In [85]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import joblib

In [87]:
data = pd.read_csv("student_stress_dataset.csv")
print(data.head())

   sleep_hours sleep_quality  study_hours  attendance_percent  \
0     6.957076       Average     2.908580           70.610913   
1     4.940024       Average     3.501950           83.711346   
2     7.625677       Average     5.026387          100.000000   
3     7.910847       Average     5.566285           81.271656   
4     3.573447          Poor     3.749992           75.401523   

   assignment_deadlines_per_week  exercise_hours junk_food_freq  \
0                              0        2.985007          Often   
1                              3        0.000000           Rare   
2                              4        3.486158          Often   
3                              4        0.000000           Rare   
4                              6        1.631350           Rare   

   caffeine_intake  screen_time_hours  social_media_hours  family_support  \
0                2           7.479738            0.000000               1   
1                7           4.663863            1.2

In [88]:
sleep_map = {"Poor":0, "Average":1, "Good":2}
junk_map  = {"Never":0, "Rare":1, "Sometimes":2, "Often":3}

In [89]:
data["sleep_quality"]   = data["sleep_quality"].map(sleep_map)
data["junk_food_freq"]  = data["junk_food_freq"].map(junk_map)

In [93]:
FEATURES = [
    "sleep_hours", "sleep_quality", "study_hours", "attendance_percent",
    "assignment_deadlines_per_week", "exercise_hours", "caffeine_intake",
    "screen_time_hours", "social_media_hours", "family_support",
    "peer_pressure", "financial_concerns", "junk_food_freq"
]

X = data[FEATURES]
y = data["stress_score"]

In [95]:
Xtr, Xte, ytr, yte = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestRegressor(n_estimators=300, random_state=42)
model.fit(Xtr, ytr)

pred = model.predict(Xte)

In [96]:
pred = model.predict(Xte)
rmse = np.sqrt(((pred - yte)**2).mean())
r2 = r2_score(yte, pred)
print(f"RMSE: {rmse:.2f}  R²: {r2:.2f}")

RMSE: 5.67  R²: 0.86


In [58]:
joblib.dump({"model": model, "features": FEATURES, "sleep_map": sleep_map, "junk_map": junk_map},
            "stress_model.pkl")
print("Saved stress_model.pkl")

Saved stress_model.pkl
