In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import joblib

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, r2_score


In [3]:
df = pd.read_csv("StudentPerformanceFactors.csv")
df.head()


Unnamed: 0,Hours_Studied,Attendance,Parental_Involvement,Access_to_Resources,Extracurricular_Activities,Sleep_Hours,Previous_Scores,Motivation_Level,Internet_Access,Tutoring_Sessions,Family_Income,Teacher_Quality,School_Type,Peer_Influence,Physical_Activity,Learning_Disabilities,Parental_Education_Level,Distance_from_Home,Gender,Exam_Score
0,23,84,Low,High,No,7,73,Low,Yes,0,Low,Medium,Public,Positive,3,No,High School,Near,Male,67
1,19,64,Low,Medium,No,8,59,Low,Yes,2,Medium,Medium,Public,Negative,4,No,College,Moderate,Female,61
2,24,98,Medium,Medium,Yes,7,91,Medium,Yes,2,Medium,Medium,Public,Neutral,4,No,Postgraduate,Near,Male,74
3,29,89,Low,Medium,Yes,8,98,Medium,Yes,1,Medium,Medium,Public,Negative,4,No,High School,Moderate,Male,71
4,19,92,Medium,Medium,Yes,6,65,Medium,Yes,3,Medium,High,Public,Neutral,4,No,College,Near,Female,70


In [4]:
df = df[[
    "Hours_Studied",
    "Attendance",
    "Previous_Scores",
    "Motivation_Level",
    "Parental_Involvement",
    "Exam_Score"
]]

df.head()


Unnamed: 0,Hours_Studied,Attendance,Previous_Scores,Motivation_Level,Parental_Involvement,Exam_Score
0,23,84,73,Low,Low,67
1,19,64,59,Low,Low,61
2,24,98,91,Medium,Medium,74
3,29,89,98,Medium,Low,71
4,19,92,65,Medium,Medium,70


In [5]:
motivation_map = {"Low": 0, "Medium": 1, "High": 2}
parental_map = {"Low": 0, "Medium": 1, "High": 2}

df["Motivation_Level"] = df["Motivation_Level"].map(motivation_map)
df["Parental_Involvement"] = df["Parental_Involvement"].map(parental_map)

df.head()


Unnamed: 0,Hours_Studied,Attendance,Previous_Scores,Motivation_Level,Parental_Involvement,Exam_Score
0,23,84,73,0,0,67
1,19,64,59,0,0,61
2,24,98,91,1,1,74
3,29,89,98,1,0,71
4,19,92,65,1,1,70


In [6]:
X = df.drop("Exam_Score", axis=1)
y = df["Exam_Score"]


In [7]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [8]:
model = LinearRegression()
model.fit(X_train, y_train)


In [9]:
y_pred = model.predict(X_test)

mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("✅ Model Performance:")
print("MAE:", round(mae, 2))
print("R2 Score:", round(r2, 2))


✅ Model Performance:
MAE: 1.19
R2 Score: 0.67


In [10]:
importance = pd.DataFrame({
    "Feature": X.columns,
    "Importance": model.coef_
})

importance


Unnamed: 0,Feature,Importance
0,Hours_Studied,0.2906
1,Attendance,0.200276
2,Previous_Scores,0.04862
3,Motivation_Level,0.526101
4,Parental_Involvement,0.985706


In [11]:
joblib.dump(model, "student_model.pkl")
print("✅ Model saved successfully!")


✅ Model saved successfully!
