## Student preformance prediction

### Importing necessary libraries

In [91]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

### Loading the Dataset

In [None]:
df = pd.read_csv("student_data.csv")
df

### Removing Unnecessary Data

In [None]:
df = df.drop(["id","first_name","last_name","email"], axis=1)
df.info()

### Changing (part_time_job , extracurricular_activities) to 1,0

In [None]:
df["part_time_job"] = df["part_time_job"].map({True: 1, False: 0})
df["extracurricular_activities"] = df["extracurricular_activities"].map({True: 1, False: 0})
df["gender"] = df["gender"].map({"male": 1, "female": 0})
df

### One Hot Encoding (career_aspiration)

In [None]:
ohe = OneHotEncoder(handle_unknown='ignore', sparse_output=False).set_output(transform='pandas')
data = ohe.fit_transform((df[["career_aspiration"]]))
df = df.sort_values(by=["career_aspiration"],ascending=1)
df = pd.concat([df, data], axis=1 ).drop(columns=['career_aspiration'])
df
# this cell made the code 60% more acurate!

### Converting students grades to GPA

In [None]:
df.info()

In [97]:

math = df["math_score"]
history = df["history_score"]
physics = df["physics_score"]
chemistery = df["chemistry_score"]
biology = df["biology_score"]
english = df["english_score"]
geography = df["geography_score"]
GPA = []
for i in range(0,2000):
    average = (math[i]+history[i]+physics[i]+chemistery[i]+biology[i]+english[i]+geography[i]) / 7
    GPA.append(round(average, 0))
    average=0
df["GPA"] = GPA

### Preparing for AI model

In [None]:
df.info()

In [99]:
X = df.drop(["GPA","math_score","history_score","physics_score","biology_score","chemistry_score","english_score","geography_score"],axis=1)
Y = df.drop(["part_time_job","absence_days","extracurricular_activities","weekly_self_study_hours"],axis=1)
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.2,random_state=45)

### The AI Model

In [100]:
model = LinearRegression()
model.fit(X_train,Y_train)
predicted_data = model.predict(X_test)

### MAE,MSE,R2

In [None]:
MSE = round(mean_squared_error(Y_test,predicted_data))
MAE = round(mean_absolute_error(Y_test,predicted_data))
R2 = round(r2_score(Y_test,predicted_data) * 100)
print(f"MAE: {MAE}")
print(f"MSE: {MSE}")
print(f"R2: {R2}%")