In [139]:
import pandas as pd
import numpy as np
import torch
from torch import nn
from torch.utils.data import TensorDataset, DataLoader
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import KFold, train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error
import matplotlib.pyplot as plt
import seaborn as sns
import joblib

In [140]:
# Load the dataset
import kagglehub
path = kagglehub.dataset_download("jayaantanaath/student-habits-vs-academic-performance")
df = pd.read_csv(path + '/student_habits_performance.csv')

# Separate features and target
df = df.drop(columns=['student_id'])
X = df.drop(columns=['exam_score'])
y = df['exam_score'].values

# Identify column types
cat_cols = X.select_dtypes(include='object').columns.tolist()
num_cols = X.select_dtypes(include=['int64', 'float64']).columns.tolist()

# Split data
X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 15 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   age                            1000 non-null   int64  
 1   gender                         1000 non-null   object 
 2   study_hours_per_day            1000 non-null   float64
 3   social_media_hours             1000 non-null   float64
 4   netflix_hours                  1000 non-null   float64
 5   part_time_job                  1000 non-null   object 
 6   attendance_percentage          1000 non-null   float64
 7   sleep_hours                    1000 non-null   float64
 8   diet_quality                   1000 non-null   object 
 9   exercise_frequency             1000 non-null   int64  
 10  parental_education_level       909 non-null    object 
 11  internet_quality               1000 non-null   object 
 12  mental_health_rating           1000 non-null   in

In [141]:
# Build preprocessing pipeline
preprocessor = ColumnTransformer([
    ('num', StandardScaler(), num_cols),
    ('cat', OneHotEncoder(sparse_output=False, handle_unknown='ignore'), cat_cols)
])

# Apply preprocessing
X_train_val_processed = preprocessor.fit_transform(X_train_val)
X_test_processed = preprocessor.transform(X_test)

# Save preprocessor
joblib.dump(preprocessor, 'preprocessor.joblib')

# Convert to tensors
X_train_val_tensor = torch.tensor(X_train_val_processed, dtype=torch.float32)
y_train_val_tensor = torch.tensor(y_train_val.reshape(-1, 1), dtype=torch.float32)
X_test_tensor = torch.tensor(X_test_processed, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.reshape(-1, 1), dtype=torch.float32)

print("Processed X_train_val shape:", X_train_val_tensor.shape)
print("Processed X_test shape:", X_test_tensor.shape)
for col in ['gender', 'part_time_job', 'diet_quality', 'parental_education_level', 'internet_quality', 'extracurricular_participation']:
    print(f"{col} unique values:", df[col].unique())

Processed X_train_val shape: torch.Size([800, 25])
Processed X_test shape: torch.Size([200, 25])
gender unique values: ['Female' 'Male' 'Other']
part_time_job unique values: ['No' 'Yes']
diet_quality unique values: ['Fair' 'Good' 'Poor']
parental_education_level unique values: ['Master' 'High School' 'Bachelor' nan]
internet_quality unique values: ['Average' 'Poor' 'Good']
extracurricular_participation unique values: ['Yes' 'No']


In [142]:
# Define models
class SimpleNet(nn.Module):
    def __init__(self, input_dim):
        super(SimpleNet, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )

    def forward(self, x):
        return self.model(x)

class DeepNet(nn.Module):
    def __init__(self, input_dim):
        super(DeepNet, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )

    def forward(self, x):
        return self.model(x)

class DeeperNet(nn.Module):
    def __init__(self, input_dim):
        super(DeeperNet, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 16),
            nn.ReLU(),
            nn.Linear(16, 1)
        )

    def forward(self, x):
        return self.model(x)

In [143]:
def train_model(model_class, X_tensor, y_tensor, learning_rate, batch_size, epochs=150, k_folds=5):
    kf = KFold(n_splits=k_folds, shuffle=True, random_state=42)
    mses, rmses, maes, mapes, r2s = [], [], [], [], []

    for fold, (train_idx, val_idx) in enumerate(kf.split(X_tensor)):
        X_train, X_val = X_tensor[train_idx], X_tensor[val_idx]
        y_train, y_val = y_tensor[train_idx], y_tensor[val_idx]

        train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=batch_size, shuffle=True)
        val_loader = DataLoader(TensorDataset(X_val, y_val), batch_size=batch_size)

        model = model_class(X_tensor.shape[1])
        criterion = nn.MSELoss()
        optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)

        for epoch in range(epochs):
            model.train()
            for xb, yb in train_loader:
                optimizer.zero_grad()
                pred = model(xb)
                loss = criterion(pred, yb)
                loss.backward()
                optimizer.step()

        model.eval()
        with torch.no_grad():
            val_preds = model(X_val)
            val_preds_np = val_preds.numpy().flatten()
            y_val_np = y_val.numpy().flatten()

            # Compute metrics
            mse = mean_squared_error(y_val_np, val_preds_np)
            rmse = np.sqrt(mse)
            mae = mean_absolute_error(y_val_np, val_preds_np)
            mape = mean_absolute_percentage_error(y_val_np, val_preds_np) * 100
            r2 = r2_score(y_val_np, val_preds_np)

            mses.append(mse)
            rmses.append(rmse)
            maes.append(mae)
            mapes.append(mape)
            r2s.append(r2)

    return np.mean(mses), np.mean(rmses), np.mean(maes), np.mean(mapes), np.mean(r2), model

In [144]:
# Test the best model on the test set
def test_model(model, X_test_tensor, y_test_tensor):
    model.eval()
    with torch.no_grad():
        test_preds = model(X_test_tensor)
        test_preds_np = test_preds.numpy().flatten()
        y_test_np = y_test_tensor.numpy().flatten()

        test_mse = mean_squared_error(y_test_np, test_preds_np)
        test_rmse = np.sqrt(test_mse)
        test_mae = mean_absolute_error(y_test_np, test_preds_np)
        test_mape = mean_absolute_percentage_error(y_test_np, test_preds_np) * 100
        test_r2 = r2_score(y_test_np, test_preds_np)

    return test_mse, test_rmse, test_mae, test_mape, test_r2

In [145]:
# Hyperparameter tuning
learning_rates = [0.0001, 0.00005, 0.00001]
batch_sizes = [16, 32, 64]
architectures = [SimpleNet, DeepNet, DeeperNet]
k_folds_list = [3, 5]

best_results = {
    "mse": (float("inf"), None, None),
    "rmse": (float("inf"), None, None),
    "mae": (float("inf"), None, None),
    "mape": (float("inf"), None, None),
    "r2": (-float("inf"), None, None),
}

for k_folds in k_folds_list:
    for lr in learning_rates:
        for bs in batch_sizes:
            for arch in architectures:
                print(f"Evaluating {arch.__name__} with lr={lr}, batch_size={bs}, k_folds={k_folds}")
                avg_mse, avg_rmse, avg_mae, avg_mape, avg_r2, trained_model = train_model(
                    arch, X_train_val_tensor, y_train_val_tensor, lr, bs, epochs=150, k_folds=k_folds
                )
                test_mse, test_rmse, test_mae, test_mape, test_r2 = test_model(trained_model, X_test_tensor, y_test_tensor)

                print(f"Validation | MSE: {avg_mse:.4f}, RMSE: {avg_rmse:.4f}, MAE: {avg_mae:.4f}, MAPE: {avg_mape:.2f}%, R2: {avg_r2:.4f}")
                print(f"Test | MSE: {test_mse:.4f}, RMSE: {test_rmse:.4f}, MAE: {test_mae:.4f}, MAPE: {test_mape:.2f}%, R2: {test_r2:.4f}\n")

                # Update bests
                metrics = {
                    "mse": avg_mse,
                    "rmse": avg_rmse,
                    "mae": avg_mae,
                    "mape": avg_mape,
                    "r2": avg_r2
                }
                test_results = (test_mse, test_rmse, test_mae, test_mape, test_r2)
                params = {'model': arch.__name__, 'lr': lr, 'batch_size': bs, 'k_folds': k_folds}

                for metric in best_results:
                    if (metric != "r2" and metrics[metric] < best_results[metric][0]) or (metric == "r2" and metrics[metric] > best_results[metric][0]):
                        best_results[metric] = (metrics[metric], params, test_results)

# Print best configurations and test results
for metric in best_results:
    val_score, params, test_scores = best_results[metric]
    print(f"\nBest configuration for Validation {metric.upper()}: {params}")
    print(f"Validation {metric.upper()}: {val_score:.4f}")
    print(f"Test MSE: {test_scores[0]:.4f}, RMSE: {test_scores[1]:.4f}, MAE: {test_scores[2]:.4f}, MAPE: {test_scores[3]:.2f}%, R2: {test_scores[4]:.4f}")

Evaluating SimpleNet with lr=0.0001, batch_size=16, k_folds=3
Validation | MSE: 34.6189, RMSE: 5.8816, MAE: 4.6339, MAPE: 7.23%, R2: 0.8691
Test | MSE: 27.0533, RMSE: 5.2013, MAE: 4.2137, MAPE: 6.86%, R2: 0.8945

Evaluating DeepNet with lr=0.0001, batch_size=16, k_folds=3
Validation | MSE: 48.8134, RMSE: 6.9788, MAE: 5.5530, MAPE: 8.73%, R2: 0.8052
Test | MSE: 39.6475, RMSE: 6.2966, MAE: 5.1025, MAPE: 8.21%, R2: 0.8454

Evaluating DeeperNet with lr=0.0001, batch_size=16, k_folds=3
Validation | MSE: 47.5831, RMSE: 6.8969, MAE: 5.4734, MAPE: 8.42%, R2: 0.8243
Test | MSE: 44.1342, RMSE: 6.6434, MAE: 5.4873, MAPE: 8.67%, R2: 0.8279

Evaluating SimpleNet with lr=0.0001, batch_size=32, k_folds=3
Validation | MSE: 31.8983, RMSE: 5.6443, MAE: 4.4526, MAPE: 6.89%, R2: 0.8772
Test | MSE: 26.8286, RMSE: 5.1796, MAE: 4.2267, MAPE: 6.95%, R2: 0.8954

Evaluating DeepNet with lr=0.0001, batch_size=32, k_folds=3
Validation | MSE: 41.5368, RMSE: 6.4374, MAE: 5.0989, MAPE: 7.98%, R2: 0.8340
Test | MSE: 

In [147]:
# Higher learning rates don't work with Deeper networks
learning_rates = [0.002, 0.001, 0.0005]
batch_sizes = [16, 32, 64]
architectures = [SimpleNet]
k_folds_list = [3, 5]

best_results = {
    "mse": (float("inf"), None, None),
    "rmse": (float("inf"), None, None),
    "mae": (float("inf"), None, None),
    "mape": (float("inf"), None, None),
    "r2": (-float("inf"), None, None),
}

for k_folds in k_folds_list:
    for lr in learning_rates:
        for bs in batch_sizes:
            for arch in architectures:
                print(f"Evaluating {arch.__name__} with lr={lr}, batch_size={bs}, k_folds={k_folds}")
                avg_mse, avg_rmse, avg_mae, avg_mape, avg_r2, trained_model = train_model(
                    arch, X_train_val_tensor, y_train_val_tensor, lr, bs, epochs=150, k_folds=k_folds
                )
                test_mse, test_rmse, test_mae, test_mape, test_r2 = test_model(trained_model, X_test_tensor, y_test_tensor)

                print(f"Validation | MSE: {avg_mse:.4f}, RMSE: {avg_rmse:.4f}, MAE: {avg_mae:.4f}, MAPE: {avg_mape:.2f}%, R2: {avg_r2:.4f}")
                print(f"Test | MSE: {test_mse:.4f}, RMSE: {test_rmse:.4f}, MAE: {test_mae:.4f}, MAPE: {test_mape:.2f}%, R2: {test_r2:.4f}\n")

                # Update bests
                metrics = {
                    "mse": avg_mse,
                    "rmse": avg_rmse,
                    "mae": avg_mae,
                    "mape": avg_mape,
                    "r2": avg_r2
                }
                test_results = (test_mse, test_rmse, test_mae, test_mape, test_r2)
                params = {'model': arch.__name__, 'lr': lr, 'batch_size': bs, 'k_folds': k_folds}

                for metric in best_results:
                    if (metric != "r2" and metrics[metric] < best_results[metric][0]) or (metric == "r2" and metrics[metric] > best_results[metric][0]):
                        best_results[metric] = (metrics[metric], params, test_results)

# Print best configurations and test results
for metric in best_results:
    val_score, params, test_scores = best_results[metric]
    print(f"\nBest configuration for Validation {metric.upper()}: {params}")
    print(f"Validation {metric.upper()}: {val_score:.4f}")
    print(f"Test MSE: {test_scores[0]:.4f}, RMSE: {test_scores[1]:.4f}, MAE: {test_scores[2]:.4f}, MAPE: {test_scores[3]:.2f}%, R2: {test_scores[4]:.4f}")

Evaluating SimpleNet with lr=0.002, batch_size=16, k_folds=3
Validation | MSE: 58.4014, RMSE: 7.6173, MAE: 5.8981, MAPE: 9.28%, R2: 0.7545
Test | MSE: 62.6831, RMSE: 7.9173, MAE: 6.4027, MAPE: 10.17%, R2: 0.7556

Evaluating SimpleNet with lr=0.002, batch_size=32, k_folds=3
Validation | MSE: 56.0416, RMSE: 7.4419, MAE: 5.8110, MAPE: 9.07%, R2: 0.7699
Test | MSE: 60.8894, RMSE: 7.8032, MAE: 6.2631, MAPE: 9.99%, R2: 0.7625

Evaluating SimpleNet with lr=0.002, batch_size=64, k_folds=3
Validation | MSE: 47.1942, RMSE: 6.8681, MAE: 5.5342, MAPE: 8.46%, R2: 0.8313
Test | MSE: 39.2007, RMSE: 6.2610, MAE: 5.2247, MAPE: 8.12%, R2: 0.8471

Evaluating SimpleNet with lr=0.001, batch_size=16, k_folds=3
Validation | MSE: 51.3335, RMSE: 7.1425, MAE: 5.7393, MAPE: 8.94%, R2: 0.7821
Test | MSE: 51.4371, RMSE: 7.1720, MAE: 5.8943, MAPE: 9.54%, R2: 0.7994

Evaluating SimpleNet with lr=0.001, batch_size=32, k_folds=3
Validation | MSE: 45.8527, RMSE: 6.7702, MAE: 5.4244, MAPE: 8.45%, R2: 0.8315
Test | MSE: 

In [148]:
# Retrain best model using best hyperparameters
print(f"Retraining with: lr={5e-05}, batch_size={32}, k_folds={5}")

# Retrain model
_, _, _, _, _, simplenet_model = train_model(
    SimpleNet,
    X_train_val_tensor,
    y_train_val_tensor,
    5e-05,
    32,
    k_folds=5
)

# Save the trained model
torch.save(simplenet_model.state_dict(), 'simplenet_model.pth')

Retraining with: lr=5e-05, batch_size=64, k_folds=5
