In [1]:
import pandas as pd
mental_df = pd.read_csv("mental_health.csv")

In [2]:
mental_df.head()
mental_df = mental_df[['Symptoms', 'Diagnosis / Condition', 'Previous Diagnosis', 'Self-care Advice', 'Suggested Therapy', 'Duration (weeks)']]

In [3]:
mental_df = mental_df.rename(columns= {
    'Diagnosis / Condition' : 'Diagnosis',
    'Self-care Advice': 'Self_Care_Advice',
    'Suggested Therapy': 'Suggested_Therapy',
    'Duration (weeks)': 'Duration',
    'Previous Diagnosis': 'Prev_Diagnosis'
})

In [4]:
therapy_unique_values = mental_df['Suggested_Therapy'].unique()
diagnosis_unique_values = mental_df['Diagnosis'].unique()
print(therapy_unique_values)
print(diagnosis_unique_values)

['Support Groups' 'Cognitive Behavioral Therapy' 'Psychotherapy'
 'Mindfulness-Based Therapy' 'No Therapy Needed']
['Panic Disorder' 'Depression' 'Anxiety' 'Burnout' 'Stress']


A label encoder is a technique that converts non-numerical data into numerical values, 
which is useful for machine learning and data analysis. It's often used when working with categorical data, such as ordinal data, where there's a hierarchy among the values

In [5]:
mental_df.columns
print(len(mental_df['Symptoms']))
print(len(mental_df['Diagnosis']))
print(len(mental_df['Self_Care_Advice']))
print(len(mental_df['Suggested_Therapy']))
print(len(mental_df['Duration']))
print(len(mental_df['Prev_Diagnosis']))

5000
5000
5000
5000
5000
5000


In [8]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report

def build_model():
    le_diagnosis = LabelEncoder()
    le_symtoms = LabelEncoder()
    le_self_care = LabelEncoder()
    le_therapy = LabelEncoder()
    # mental_df['Duration'] is already int style, we don't have to encode it
    
    mental_df['Diagnosis_encoded'] = le_diagnosis.fit_transform(mental_df['Diagnosis'])
    mental_df['Symptoms_encoded'] = le_symtoms.fit_transform(mental_df['Symptoms'])
    mental_df['Self_Care_Advice_encoded'] = le_self_care.fit_transform(mental_df['Self_Care_Advice'])
    mental_df['Suggested_Therapy_encoded'] = le_therapy.fit_transform(mental_df['Suggested_Therapy'])

    # Training Data
    X = mental_df[['Diagnosis_encoded', 'Symptoms_encoded']]
    y_self_care = mental_df['Self_Care_Advice_encoded']
    y_therapy = mental_df['Suggested_Therapy_encoded']

    X_train, X_test, y_self_care_train, y_self_care_test, y_therapy_train, y_therapy_test = train_test_split(X, y_self_care, y_therapy, test_size=0.2, random_state=42)
    # Train models for Self Care Advice and Suggested Therapy
    model_self_care = RandomForestClassifier()
    model_therapy = RandomForestClassifier()

    model_self_care.fit(X_train, y_self_care_train)
    model_therapy.fit(X_train, y_therapy_train)

    # Make predictions
    self_care_pred = model_self_care.predict(X_test)
    therapy_pred = model_therapy.predict(X_test)

    # Display classification reports
    print("Self Care Advice Classification Report:")
    self_care_report = classification_report(y_self_care_test, self_care_pred, target_names=le_self_care.classes_)
    print(self_care_report)
    print("\nSuggested Therapy Classification Report:")
    self_therapy_report = classification_report(y_therapy_test, therapy_pred, target_names=le_therapy.classes_)
    print(self_therapy_report)
    return model_self_care , model_therapy, le_diagnosis, le_symtoms, le_self_care, le_therapy, self_care_report, self_therapy_report

# Initial Reports

### Self Care Advice Classification Report

| Self Care Advice       | Precision | Recall | F1-Score | Support |
|------------------------|-----------|--------|----------|---------|
| Breathing Exercises    | 0.20      | 0.17   | 0.18     | 166     |
| Exercise               | 0.17      | 0.24   | 0.20     | 181     |
| Journaling             | 0.32      | 0.14   | 0.20     | 191     |
| Meditation             | 0.11      | 0.03   | 0.04     | 116     |
| Take Breaks            | 0.17      | 0.33   | 0.23     | 164     |
| Talk to a Friend       | 0.22      | 0.21   | 0.21     | 182     |
| **Accuracy**           |           |        | 0.19     | 1000    |
| **Macro Avg**          | 0.20      | 0.19   | 0.18     | 1000    |
| **Weighted Avg**       | 0.21      | 0.19   | 0.18     | 1000    |

### Suggested Therapy Classification Report

| Suggested Therapy               | Precision | Recall | F1-Score | Support |
|---------------------------------|-----------|--------|----------|---------|
| Cognitive Behavioral Therapy    | 0.16      | 0.13   | 0.14     | 200     |
| Mindfulness-Based Therapy       | 0.14      | 0.03   | 0.05     | 189     |
| No Therapy Needed               | 0.17      | 0.09   | 0.11     | 187     |
| Psychotherapy                   | 0.17      | 0.31   | 0.22     | 202     |
| Support Groups                  | 0.24      | 0.34   | 0.28     | 222     |
| **Accuracy**                    |           |        | 0.19     | 1000    |
| **Macro Avg**                   | 0.17      | 0.18   | 0.16     | 1000    |
| **Weighted Avg**                | 0.18      | 0.19   | 0.17     | 1000    |



In [9]:
# model_self_care , model_therapy, le_diagnosis, le_symtoms, le_self_care, le_therapy, self_care_report, self_therapy_report = build_model()
# Save model for purpose
import joblib
def save_model(model_self_care , model_therapy, le_diagnosis, le_symtoms, le_self_care, le_therapy ):
    joblib.dump(model_self_care, "model_self_care.pkl")
    joblib.dump(model_therapy, "model_therapy.pkl")
    joblib.dump(le_diagnosis, "le_diagnosis.pkl")
    joblib.dump(le_symtoms, "le_symtoms.pkl")
    joblib.dump(le_self_care, "le_self_care.pkl")
    joblib.dump(le_therapy, "le_therapy.pkl")

# save_model(model_self_care , model_therapy, le_diagnosis, le_symtoms, le_self_care, le_therapy)

In [12]:

def test_build_model(mental_df):
    
    le_diagnosis = LabelEncoder()
    le_symtoms = LabelEncoder()
    le_self_care = LabelEncoder()
    le_therapy = LabelEncoder()
    le_prev_diagnosis = LabelEncoder()

    mental_df['Diagnosis_encoded'] = le_diagnosis.fit_transform(mental_df['Diagnosis'])
    mental_df['Symptoms_encoded'] = le_symtoms.fit_transform(mental_df['Symptoms'])
    mental_df['Self_Care_Advice_encoded'] = le_self_care.fit_transform(mental_df['Self_Care_Advice'])
    mental_df['Suggested_Therapy_encoded'] = le_therapy.fit_transform(mental_df['Suggested_Therapy'])
    mental_df['Prev_Diagnosis_encoded'] = le_prev_diagnosis.fit_transform(mental_df['Prev_Diagnosis'])
    
    # mental_df['Duration'] It is already mental_df we don't have to label codes it

    # Training Data with duration
    
    X = mental_df[['Diagnosis_encoded', 'Symptoms_encoded', 'Prev_Diagnosis_encoded']] 
    y_self_care = mental_df['Self_Care_Advice_encoded']
    y_therapy = mental_df['Suggested_Therapy_encoded']

    X_train, X_test, y_self_care_train, y_self_care_test, y_therapy_train, y_therapy_test = train_test_split(X, y_self_care, y_therapy, test_size=0.2, random_state=42)
    # Train models for Self Care Advice and Suggested Therapy
    model_self_care = RandomForestClassifier()
    model_therapy = RandomForestClassifier()

    model_self_care.fit(X_train, y_self_care_train)
    model_therapy.fit(X_train, y_therapy_train)

    # Make predictions
    self_care_pred = model_self_care.predict(X_test)
    therapy_pred = model_therapy.predict(X_test)

    # Display classification reports
    print("Self Care Advice Classification Report:")
    self_care_report = classification_report(y_self_care_test, self_care_pred, target_names=le_self_care.classes_)
    print(self_care_report)
    print("\nSuggested Therapy Classification Report:")
    self_therapy_report = classification_report(y_therapy_test, therapy_pred, target_names=le_therapy.classes_)
    print(self_therapy_report)
    return model_self_care , model_therapy, le_diagnosis, le_symtoms, le_self_care, le_therapy, self_care_report, self_therapy_report

test_build_model(mental_df)

Self Care Advice Classification Report:
                     precision    recall  f1-score   support

Breathing Exercises       0.22      0.19      0.20       166
           Exercise       0.17      0.22      0.19       181
         Journaling       0.21      0.19      0.20       191
         Meditation       0.06      0.03      0.04       116
        Take Breaks       0.17      0.24      0.20       164
   Talk to a Friend       0.18      0.16      0.17       182

           accuracy                           0.18      1000
          macro avg       0.17      0.17      0.17      1000
       weighted avg       0.17      0.18      0.17      1000


Suggested Therapy Classification Report:
                              precision    recall  f1-score   support

Cognitive Behavioral Therapy       0.22      0.17      0.19       200
   Mindfulness-Based Therapy       0.18      0.16      0.17       189
           No Therapy Needed       0.18      0.15      0.16       187
               Psychothe

(RandomForestClassifier(),
 RandomForestClassifier(),
 LabelEncoder(),
 LabelEncoder(),
 LabelEncoder(),
 LabelEncoder(),
 '                     precision    recall  f1-score   support\n\nBreathing Exercises       0.22      0.19      0.20       166\n           Exercise       0.17      0.22      0.19       181\n         Journaling       0.21      0.19      0.20       191\n         Meditation       0.06      0.03      0.04       116\n        Take Breaks       0.17      0.24      0.20       164\n   Talk to a Friend       0.18      0.16      0.17       182\n\n           accuracy                           0.18      1000\n          macro avg       0.17      0.17      0.17      1000\n       weighted avg       0.17      0.18      0.17      1000\n',
 '                              precision    recall  f1-score   support\n\nCognitive Behavioral Therapy       0.22      0.17      0.19       200\n   Mindfulness-Based Therapy       0.18      0.16      0.17       189\n           No Therapy Needed    

# Self Care Advice Classification Report with Based with durations.

| Self Care Advice       | Precision | Recall | F1-Score | Support |
|------------------------|-----------|--------|----------|---------|
| Breathing Exercises    | 0.16      | 0.15   | 0.15     | 166     |
| Exercise               | 0.18      | 0.18   | 0.18     | 181     |
| Journaling             | 0.20      | 0.20   | 0.20     | 191     |
| Meditation             | 0.09      | 0.07   | 0.08     | 116     |
| Take Breaks            | 0.14      | 0.14   | 0.14     | 164     |
| Talk to a Friend       | 0.16      | 0.18   | 0.17     | 182     |
| **Accuracy**           |           |        | 0.16     | 1000    |
| **Macro Avg**          | 0.15      | 0.15   | 0.15     | 1000    |
| **Weighted Avg**       | 0.16      | 0.16   | 0.16     | 1000    |

# Suggested Therapy Classification Report

| Suggested Therapy               | Precision | Recall | F1-Score | Support |
|---------------------------------|-----------|--------|----------|---------|
| Cognitive Behavioral Therapy    | 0.20      | 0.23   | 0.21     | 200     |
| Mindfulness-Based Therapy       | 0.17      | 0.16   | 0.16     | 189     |
| No Therapy Needed               | 0.21      | 0.17   | 0.19     | 187     |
| Psychotherapy                   | 0.22      | 0.23   | 0.22     | 202     |
| Support Groups                  | 0.26      | 0.27   | 0.26     | 222     |
| **Accuracy**                    |           |        | 0.21     | 1000    |
| **Macro Avg**                   | 0.21      | 0.21   | 0.21     | 1000    |
| **Weighted Avg**
