# Student Performance Prediction (Using KaggleHub)

In [None]:
#!pip install pandas scikit-learn kagglehub

In [None]:
import pandas as pd
import kagglehub

In [None]:
path = kagglehub.dataset_download("spscientist/students-performance-in-exams")
print("Dataset Path:", path)

In [None]:
df = pd.read_csv(f"{path}/StudentsPerformance.csv")
df.head()

In [None]:
df.info()
df.describe()

## Encode Categorical Variables

In [None]:
from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder()

for column in df.columns:
    if df[column].dtype == 'object':
        df[column] = encoder.fit_transform(df[column])

df.head()

## Feature / Target Split

In [None]:
# Predict math score
X = df.drop('math score', axis=1)
y = df['math score']

## Train-Test Split

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

## Model Training

In [None]:
from sklearn.ensemble import RandomForestRegressor

model = RandomForestRegressor(n_estimators=300, random_state=42)
model.fit(X_train, y_train)

print('Model trained.')

## Evaluation

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

y_pred = model.predict(X_test)

print('MAE:', mean_absolute_error(y_test, y_pred))
print('RMSE:', np.sqrt(mean_squared_error(y_test, y_pred)))
print('RÂ² Score:', r2_score(y_test, y_pred))

## Feature Importance

In [None]:
importance = pd.DataFrame({
    'Feature': X.columns,
    'Importance': model.feature_importances_
}).sort_values(by='Importance', ascending=False)

importance.head(10)

## Save Model

In [None]:
import pickle

with open('student_performance_model.pkl', 'wb') as f:
    pickle.dump(model, f)

print('Model saved.')