In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [None]:
df = pd.read_csv('student_depression_dataset.csv')


df = df.dropna()

# Separate target and features
y = df['Depression Score'] 
X = df.drop('Depression Score', axis=1)  

# Separate numerical and categorical features
numerical_cols = X.select_dtypes(include=['int64', 'float64']).columns.tolist()
categorical_cols = X.select_dtypes(include=['object']).columns.tolist()

# One-hot encode categorical features
ohe = OneHotEncoder(drop='first', sparse=False)
X_encoded = pd.DataFrame(ohe.fit_transform(X[categorical_cols]), columns=ohe.get_feature_names_out())

# Standardize numerical features
scaler = StandardScaler()
X_scaled = pd.DataFrame(scaler.fit_transform(X[numerical_cols]), columns=numerical_cols)

# Combine
X_processed = pd.concat([X_scaled.reset_index(drop=True), X_encoded.reset_index(drop=True)], axis=1)

: 

In [None]:
# === Train-Test Split ===
X_train, X_test, y_train, y_test = train_test_split(X_processed, y, test_size=0.2, random_state=42)


In [None]:
# === Ridge Regression ===
model = Ridge(alpha=1.0)
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

In [None]:
# === Evaluation ===
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse:.2f}")
print(f"Mean Absolute Error: {mae:.2f}")
print(f"R-squared: {r2:.2f}")

# Cross-validation
cv_scores = cross_val_score(model, X_processed, y, cv=5, scoring='r2')
print(f"Cross-validated R2: {cv_scores.mean():.2f} (+/- {cv_scores.std():.2f})")

# === Residuals Plot ===
residuals = y_test - y_pred
plt.figure(figsize=(8, 6))
plt.scatter(y_test, residuals, alpha=0.6)
plt.axhline(0, color='red', linestyle='--')
plt.xlabel('Actual Depression Scores')
plt.ylabel('Residuals')
plt.title('Residuals Plot')
plt.grid(True)
plt.show()