In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder
import joblib

In [2]:
# Load dataset
df = pd.read_csv("datasets\data.csv", sep=None, engine="python")

# Define question groups
depression_qs = ["Q3A", "Q5A", "Q10A", "Q13A", "Q16A", "Q21A",
                 "Q24A", "Q26A", "Q31A", "Q34A", "Q37A", "Q38A", "Q42A"]

anxiety_qs = ["Q2A", "Q4A", "Q7A", "Q9A", "Q15A", "Q19A",
              "Q20A", "Q23A", "Q25A", "Q28A", "Q30A", "Q36A", "Q40A", "Q41A"]

stress_qs = ["Q1A", "Q6A", "Q8A", "Q11A", "Q12A", "Q14A",
             "Q17A", "Q18A", "Q22A", "Q27A", "Q29A", "Q32A", "Q33A", "Q35A"]

# Drop missing values for all
all_qs = depression_qs + anxiety_qs + stress_qs
df = df[all_qs].dropna()

print("Data ready:", df.shape)

  df = pd.read_csv("datasets\data.csv", sep=None, engine="python")


Data ready: (39775, 41)


In [3]:
# Calculate subscale scores
df["depression_score"] = df[depression_qs].sum(axis=1)
df["anxiety_score"] = df[anxiety_qs].sum(axis=1)
df["stress_score"] = df[stress_qs].sum(axis=1)

# Label mapping functions
def label_depression(score):
    if score <= 9:
        return "normal"
    elif score <= 13:
        return "mild"
    elif score <= 20:
        return "moderate"
    elif score <= 27:
        return "severe"
    else:
        return "extremely severe"

def label_anxiety(score):
    if score <= 7:
        return "normal"
    elif score <= 9:
        return "mild"
    elif score <= 14:
        return "moderate"
    elif score <= 19:
        return "severe"
    else:
        return "extremely severe"

def label_stress(score):
    if score <= 14:
        return "normal"
    elif score <= 18:
        return "mild"
    elif score <= 25:
        return "moderate"
    elif score <= 33:
        return "severe"
    else:
        return "extremely severe"

# Apply labels
df["depression_label"] = df["depression_score"].apply(label_depression)
df["anxiety_label"] = df["anxiety_score"].apply(label_anxiety)
df["stress_label"] = df["stress_score"].apply(label_stress)

print("Labeled data preview:")
print(df[["depression_score", "depression_label", "anxiety_score", "anxiety_label", "stress_score", "stress_label"]].head())


Labeled data preview:
   depression_score  depression_label  anxiety_score     anxiety_label  \
0                38  extremely severe             48  extremely severe   
1                34  extremely severe             31  extremely severe   
2                49  extremely severe             26  extremely severe   
3                27            severe             31  extremely severe   
4                42  extremely severe             54  extremely severe   

   stress_score      stress_label  
0            53  extremely severe  
1            43  extremely severe  
2            33            severe  
3            30            severe  
4            44  extremely severe  


In [4]:
# Train individual models
for label_name, questions in zip(
    ["depression", "anxiety", "stress"],
    [depression_qs, anxiety_qs, stress_qs]
):
    print(f"\nTraining model for {label_name}...")
    X = df[questions]
    y = df[f"{label_name}_label"]

    # Encode label
    encoder = LabelEncoder()
    y_encoded = encoder.fit_transform(y)

    # Split
    X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

    # Train model
    model = RandomForestClassifier()
    model.fit(X_train, y_train)

    # Evaluate
    y_pred = model.predict(X_test)
    print(classification_report(y_test, y_pred, target_names=encoder.classes_))

    # Save model and encoder
    joblib.dump(model, f"models/model_{label_name}.pkl")
    joblib.dump(encoder, f"models/encoder_{label_name}.pkl")

print("\nAll models and encoders saved successfully.")




Training model for depression...
                  precision    recall  f1-score   support

extremely severe       0.99      0.99      0.99      4961
            mild       1.00      1.00      1.00       192
        moderate       0.96      0.97      0.96      1321
          severe       0.93      0.93      0.93      1481

        accuracy                           0.97      7955
       macro avg       0.97      0.97      0.97      7955
    weighted avg       0.97      0.97      0.97      7955


Training model for anxiety...
                  precision    recall  f1-score   support

extremely severe       1.00      0.98      0.99      6593
        moderate       1.00      1.00      1.00       182
          severe       0.91      0.97      0.94      1180

        accuracy                           0.98      7955
       macro avg       0.97      0.99      0.98      7955
    weighted avg       0.98      0.98      0.98      7955


Training model for stress...
                  precision  

In [5]:
# Load models and encoders
models = {
    "depression": joblib.load("models/model_depression.pkl"),
    "anxiety": joblib.load("models/model_anxiety.pkl"),
    "stress": joblib.load("models/model_stress.pkl"),
}
encoders = {
    "depression": joblib.load("models/encoder_depression.pkl"),
    "anxiety": joblib.load("models/encoder_anxiety.pkl"),
    "stress": joblib.load("models/encoder_stress.pkl"),
}

# Define question sets
qs = {
    "depression": ["Q3A", "Q5A", "Q10A", "Q13A", "Q16A", "Q21A", "Q24A", "Q26A", "Q31A", "Q34A", "Q37A", "Q38A", "Q42A"],
    "anxiety": ["Q2A", "Q4A", "Q7A", "Q9A", "Q15A", "Q19A", "Q20A", "Q23A", "Q25A", "Q28A", "Q30A", "Q36A", "Q40A", "Q41A"],
    "stress": ["Q1A", "Q6A", "Q8A", "Q11A", "Q12A", "Q14A", "Q17A", "Q18A", "Q22A", "Q27A", "Q29A", "Q32A", "Q33A", "Q35A"],
}

def predict_all(user_input: dict):
    result = {}
    for label, qlist in qs.items():
        model = models[label]
        encoder = encoders[label]
        input_df = pd.DataFrame([[user_input[q] for q in qlist]], columns=qlist)
        pred = model.predict(input_df)[0]
        result[label] = encoder.inverse_transform([pred])[0]
    return result

# 🔍 Example usage:
# print(predict_all(user_input))