# Student Academic Performance Prediction with Gradient Descent
This notebook demonstrates predicting student academic performance using our C++ gradient descent implementation.

## Setup
Import necessary libraries and load the student dataset.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import gradientdescent as gd

print("Gradient Descent module loaded successfully!")

In [None]:
# Load student dataset
data = pd.read_csv('../../data/dataset.csv')

print(f"Dataset shape: {data.shape}")
print("\nFirst few rows:")
data.head()

## Data Exploration and Preprocessing
Let's explore the dataset and prepare it for regression.

In [None]:
# Check data types and missing values
print("Dataset info:")
data.info()
print("\nMissing values:")
print(data.isnull().sum().sum())
print("\nTarget distribution:")
print(data['Target'].value_counts())

In [None]:
# Create a continuous target variable from academic performance indicators
# We'll predict the average grade from semester grades
data['avg_grade'] = (data['Curricular units 1st sem (grade)'] + 
                     data['Curricular units 2nd sem (grade)']) / 2

# Remove rows with zero grades (no evaluations)
data_clean = data[data['avg_grade'] > 0].copy()

print(f"Clean dataset shape: {data_clean.shape}")
print(f"Average grade statistics:")
print(data_clean['avg_grade'].describe())

In [None]:
# Select relevant features for prediction
feature_cols = [
    'Age at enrollment',
    'Curricular units 1st sem (enrolled)',
    'Curricular units 1st sem (approved)',
    'Curricular units 2nd sem (enrolled)',
    'Curricular units 2nd sem (approved)',
    'Unemployment rate',
    'Inflation rate',
    'GDP'
]

X = data_clean[feature_cols].values
y = data_clean['avg_grade'].values

print(f"Features shape: {X.shape}")
print(f"Target shape: {y.shape}")
print(f"\nFeature names: {feature_cols}")

In [None]:
# Split data (80% train, 20% test)
train_size = int(0.8 * len(X))
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

print(f"Training samples: {X_train.shape[0]}")
print(f"Testing samples: {X_test.shape[0]}")

In [None]:
# Normalize features
X_mean = np.mean(X_train, axis=0)
X_std = np.std(X_train, axis=0)
X_train_norm = (X_train - X_mean) / X_std
X_test_norm = (X_test - X_mean) / X_std

# Normalize target
y_mean = np.mean(y_train)
y_std = np.std(y_train)
y_train_norm = (y_train - y_mean) / y_std
y_test_norm = (y_test - y_mean) / y_std

print(f"Feature means: {X_mean}")
print(f"Feature stds: {X_std}")
print(f"Target mean: {y_mean:.4f}, std: {y_std:.4f}")

## Model Training with C++ Gradient Descent
Train a linear regression model using our C++ implementation.

In [None]:
# Convert data to the format expected by our C++ code
X_train_list = X_train_norm.tolist()
y_train_list = y_train_norm.tolist()

# Initialize weights with random values
np.random.seed(42)
n_features = X_train.shape[1]
w = [gd.Variable.create(np.random.randn() * 0.1, True) for _ in range(n_features)]
b = gd.Variable.create(np.random.randn() * 0.1, True)  # bias term

print(f"Number of features: {n_features}")
print(f"Initial weights: {[w_i.value for w_i in w]}")
print(f"Initial bias: {b.value}")

# Create loss function and optimizer
loss_fn = gd.MSE()
optimizer = gd.Vanilla()

In [None]:
# Training parameters
learning_rate = 0.01
n_epochs = 1000

# Training loop
losses = []
weights_history = []

# Include bias in the weights list for the optimizer
all_weights = w + [b]

for epoch in range(n_epochs):
    # Train one step with all weights including bias
    optimizer.train(all_weights, X_train_list, y_train_list, loss_fn, learning_rate)
    
    # Compute current predictions and loss for monitoring
    y_pred = []
    for i in range(len(X_train_list)):
        pred = gd.Variable.create(0.0)
        for j in range(n_features):
            x_ij = gd.Variable.create(X_train_list[i][j])
            pred = pred + w[j] * x_ij
        pred = pred + b  # Add bias
        y_pred.append(pred)
    
    loss = loss_fn.compute(y_pred, y_train_list)
    losses.append(loss.value)
    
    # Store current weights
    weights_history.append([w_i.value for w_i in w] + [b.value])
    
    # Print progress
    if epoch % 100 == 0 or epoch == n_epochs - 1:
        print(f"Epoch {epoch}: Loss = {loss.value:.6f}")

print(f"\nFinal weights: {[w_i.value for w_i in w]}")
print(f"Final bias: {b.value}")

## Model Evaluation
Evaluate the trained model on the test set.

In [None]:
# Make predictions on test set
y_test_pred_norm = []
for i in range(len(X_test_norm)):
    pred = b.value  # Start with bias
    for j in range(n_features):
        pred += w[j].value * X_test_norm[i][j]
    y_test_pred_norm.append(pred)

# Convert back to original scale
y_test_pred = np.array(y_test_pred_norm) * y_std + y_mean

# Calculate metrics
mse = np.mean((y_test - y_test_pred) ** 2)
rmse = np.sqrt(mse)
mae = np.mean(np.abs(y_test - y_test_pred))
r2 = 1 - (np.sum((y_test - y_test_pred) ** 2) / np.sum((y_test - np.mean(y_test)) ** 2))

print(f"Test Metrics:")
print(f"MSE: {mse:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"MAE: {mae:.4f}")
print(f"R²: {r2:.4f}")

In [None]:
# Visualization
plt.figure(figsize=(10, 4))

# Plot 1: Prediction Errors (Residuals)
plt.subplot(1, 2, 1)
residuals = y_test - y_test_pred
plt.scatter(y_test_pred, residuals, alpha=0.6)
plt.axhline(y=0, color='r', linestyle='--', lw=2)
plt.xlabel('Predicted Grade')
plt.ylabel('Prediction Error')
plt.title(f'Residuals Plot (RMSE = {rmse:.3f})')
plt.grid(True, alpha=0.3)

# Plot 2: Feature importance (weights)
plt.subplot(1, 2, 2)
feature_importance = np.abs([w_i.value for w_i in w])
plt.barh(range(len(feature_cols)), feature_importance)
plt.yticks(range(len(feature_cols)), [col.replace('Curricular units ', '').replace(' (', '\n(') for col in feature_cols])
plt.xlabel('|Weight|')
plt.title('Feature Importance')
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## Training Visualization
Let's visualize the training process.

In [None]:
# Convert weights history to numpy array
weights_history = np.array(weights_history)

# Plot loss over epochs
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(losses, 'b-', linewidth=2)
plt.title('Training Loss Over Time')
plt.xlabel('Epoch')
plt.ylabel('Loss (MSE)')
plt.grid(True, alpha=0.3)

# Plot weight convergence
plt.subplot(1, 2, 2)
for i in range(n_features):
    plt.plot(weights_history[:, i], label=f'w[{i}]')
plt.plot(weights_history[:, -1], 'k--', label='bias')
plt.title('Weight Convergence')
plt.xlabel('Epoch')
plt.ylabel('Weight Value')
plt.legend()
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()


In [None]:
# Analyze feature weights
print("Feature Analysis:")
print("=" * 50)
for i, (feature, weight) in enumerate(zip(feature_cols, [w_i.value for w_i in w])):
    print(f"{feature:35s}: {weight:8.4f}")
print(f"{'Bias':35s}: {b.value:8.4f}")

print("\nMost important features (by absolute weight):")
weights_values = [w_i.value for w_i in w]
importance_idx = np.argsort(np.abs(weights_values))[::-1]
for i in importance_idx[:5]:
    print(f"{feature_cols[i]:35s}: {weights_values[i]:8.4f}")