In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score


In [5]:
# Load the dataset
file_path = 'Student Performance Prediction Data.csv'
df = pd.read_csv(file_path)

# Display the first few rows of the dataset
df.head()


Unnamed: 0,Age,Gender,Education_Level,Number_of_Sessions_Completed,Total_Time_Spent_hrs,Time_Spent_Per_Session_mins,Interactions_Clicks,Student_Feedback_Rating
0,22.0,Female,Undergraduate,15,20,40,120,4.0
1,24.0,Male,Graduate,10,18,50,110,3.0
2,19.0,Female,High School,20,25,60,150,5.0
3,21.0,Male,Undergraduate,18,22,45,140,4.0
4,23.0,Female,Graduate,12,15,55,100,2.0


In [6]:
# Checking for missing values
df.isnull().sum()

# Fill or drop missing values
df = df.dropna()  # Alternatively, you can fill missing values with df.fillna() if needed

# Encode categorical features (e.g., 'Gender' and 'Education_Level')
label_encoder = LabelEncoder()
df['Gender'] = label_encoder.fit_transform(df['Gender'])
df['Education_Level'] = label_encoder.fit_transform(df['Education_Level'])


In [7]:
X = df.drop('Student_Feedback_Rating', axis=1)  # Features
y = df['Student_Feedback_Rating']  # Target

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [8]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [9]:
# Model initialization and training
model = LinearRegression()
model.fit(X_train_scaled, y_train)

# Predictions
y_pred = model.predict(X_test_scaled)


In [10]:
# Evaluation metrics
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print(f"Mean Absolute Error: {mae}")
print(f"Mean Squared Error: {mse}")
print(f"Root Mean Squared Error: {rmse}")
print(f"R^2 Score: {r2}")


Mean Absolute Error: 1.0921862579122692
Mean Squared Error: 1.4179445000878188
Root Mean Squared Error: 1.19077474783765
R^2 Score: -1.8358890001756376


In [11]:
import joblib
joblib.dump(model, 'student_performance_predictor.pkl')


['student_performance_predictor.pkl']