In [8]:
# pip install sqlalchemy pymysql pandas scikit-learn imbalanced-learn joblib fastapi uvicorn pydantic

In [9]:
# Import pandas for data manipulation and analysis
import pandas as pd  

# Import create_engine from SQLAlchemy to connect Python with MySQL
from sqlalchemy import create_engine  

# Create a MySQL connection (user=root, password=******, host=localhost, port=3306, database=demo)
engine = create_engine("mysql+pymysql://root:050901@localhost:3306/demo")  

# Read all data from the table 'StressIndicators' into a Pandas DataFrame
df = pd.read_sql("SELECT * FROM StressIndicators;", engine)  

# Print the first 5 rows of the DataFrame to check a sample of the data
print(df.head())  

print(df.columns)
# For machine learning: separate features (X) and target (y)
# Here, 'stress_level' is the target column we want to predict
X = df.drop(columns = ['stress_level'], axis=1)

y = df["stress_level"]
            


   indicator_id  student_id  anxiety_level  self_esteem  depression  \
0             1           1             14           20          11   
1             2           2             15            8          15   
2             3           3             12           18          14   
3             4           4             16           12          15   
4             5           5             16           28           7   

   sleep_quality  social_support  peer_pressure  bullying  stress_level  
0              2               2              3         2             1  
1              1               1              4         5             2  
2              2               2              3         2             1  
3              1               1              4         5             2  
4              5               1              5         5             1  
Index(['indicator_id', 'student_id', 'anxiety_level', 'self_esteem',
       'depression', 'sleep_quality', 'social_support', 'pee

In [10]:
from imblearn.over_sampling import RandomOverSampler
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import joblib

# ---------- Handle Imbalance ----------
ros = RandomOverSampler(random_state=42)
X_resampled, y_resampled = ros.fit_resample(X, y)

# ---------- Train-Test Split ----------
X_train, X_test, y_train, y_test = train_test_split(
    X_resampled, y_resampled, test_size=0.2, random_state=42, stratify=y_resampled
)

# ---------- Feature Scaling ----------
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# ---------- Model Training ----------
model = RandomForestClassifier(
    class_weight="balanced",
    n_estimators=100,
    max_depth=None,
    random_state=42
)
model.fit(X_train, y_train)

# ---------- Evaluation ----------
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))

# ---------- Save Model + Scaler ----------
joblib.dump(model, "rf_model.pkl")
joblib.dump(scaler, "scaler.pkl")


              precision    recall  f1-score   support

           1       0.00      0.00      0.00         1
           2       0.50      1.00      0.67         1

    accuracy                           0.50         2
   macro avg       0.25      0.50      0.33         2
weighted avg       0.25      0.50      0.33         2



  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


['scaler.pkl']

In [11]:
# Before balancing
print("Before SMOTE:", y.value_counts())

# After balancing
print("After SMOTE:", y_resampled.value_counts())


Before SMOTE: stress_level
1    3
2    2
Name: count, dtype: int64
After SMOTE: stress_level
1    3
2    3
Name: count, dtype: int64
