<a href="https://colab.research.google.com/github/2303a51884/2303a51884-pyds-b-02/blob/main/student_stress.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.metrics import mean_squared_error, r2_score

from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor

from sklearn.linear_model import Ridge
from sklearn.ensemble import StackingRegressor

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout


In [5]:
# Load dataset
df = pd.read_csv("StressLevelDataset.csv")

# Encode categorical columns
# The column 'academic_pressure' was not found in the dataset, causing a KeyError.
# It has been removed from the list of categorical columns to encode.
cat_cols = ['sleep_quality']
for col in cat_cols:
    df[col] = LabelEncoder().fit_transform(df[col])

# Select features & target
X = df.drop('stress_level', axis=1)
y = df['stress_level']

# Normalize features
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)


In [6]:
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)


In [7]:
rf = RandomForestRegressor(n_estimators=200, random_state=42)
rf.fit(X_train, y_train)

rf_pred = rf.predict(X_test)

rf_rmse = np.sqrt(mean_squared_error(y_test, rf_pred))
rf_r2 = r2_score(y_test, rf_pred)


In [8]:
xgb = XGBRegressor(
    n_estimators=300,
    learning_rate=0.05,
    max_depth=6,
    subsample=0.8,
    colsample_bytree=0.8
)
xgb.fit(X_train, y_train)

xgb_pred = xgb.predict(X_test)

xgb_rmse = np.sqrt(mean_squared_error(y_test, xgb_pred))
xgb_r2 = r2_score(y_test, xgb_pred)


In [9]:
estimators = [
    ('rf', rf),
    ('xgb', xgb)
]

stack = StackingRegressor(
    estimators=estimators,
    final_estimator=Ridge()
)

stack.fit(X_train, y_train)
stack_pred = stack.predict(X_test)

stack_rmse = np.sqrt(mean_squared_error(y_test, stack_pred))
stack_r2 = r2_score(y_test, stack_pred)


In [10]:
# Reshape into 3D format
X_train_lstm = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_test_lstm = X_test.reshape((X_test.shape[0], 1, X_test.shape[1]))


In [11]:
model = Sequential()
model.add(LSTM(64, return_sequences=True, input_shape=(1, X_train.shape[1])))
model.add(Dropout(0.2))
model.add(LSTM(32))
model.add(Dense(1))

model.compile(optimizer='adam', loss='mse')
history = model.fit(X_train_lstm, y_train, epochs=50, batch_size=16, verbose=0)

lstm_pred = model.predict(X_test_lstm).reshape(-1)

lstm_rmse = np.sqrt(mean_squared_error(y_test, lstm_pred))
lstm_r2 = r2_score(y_test, lstm_pred)


  super().__init__(**kwargs)


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 51ms/step


In [12]:
print("Random Forest  -> RMSE:", rf_rmse, " | R²:", rf_r2)
print("XGBoost        -> RMSE:", xgb_rmse, " | R²:", xgb_r2)
print("Stack Ensemble -> RMSE:", stack_rmse, " | R²:", stack_r2)
print("LSTM Model     -> RMSE:", lstm_rmse, " | R²:", lstm_r2)


Random Forest  -> RMSE: 0.3565414676889887  | R²: 0.8096024756305121
XGBoost        -> RMSE: 0.3735408807453002  | R²: 0.7910138368606567
Stack Ensemble -> RMSE: 0.3572511497771738  | R²: 0.8088437633783613
LSTM Model     -> RMSE: 0.37958603276291925  | R²: 0.7841948866844177
