In [None]:
!kaggle datasets download -d cid007/mental-disorder-classification --unzip


Dataset URL: https://www.kaggle.com/datasets/cid007/mental-disorder-classification
License(s): CC0-1.0
Downloading mental-disorder-classification.zip to /content
  0% 0.00/2.17k [00:00<?, ?B/s]
100% 2.17k/2.17k [00:00<00:00, 3.79MB/s]


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import BaggingClassifier, AdaBoostClassifier, GradientBoostingClassifier, RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer



In [None]:
data = pd.read_csv('Dataset-Mental-Disorders.csv')

In [None]:
X = data.drop(columns=['Expert Diagnose' ])  # Replace 'target' with the actual target column name
y = data['Expert Diagnose']  # Replace 'target' with the actual target column name



In [None]:
print(data.columns)

Index(['Patient Number', 'Sadness', 'Euphoric', 'Exhausted', 'Sleep dissorder',
       'Mood Swing', 'Suicidal thoughts', 'Anorxia', 'Authority Respect',
       'Try-Explanation', 'Aggressive Response', 'Ignore & Move-On',
       'Nervous Break-down', 'Admit Mistakes', 'Overthinking',
       'Sexual Activity', 'Concentration', 'Optimisim', 'Expert Diagnose'],
      dtype='object')


In [None]:
X[numeric_columns] = X[numeric_columns].apply(pd.to_numeric, errors='coerce')

# Handle missing values for numeric columns
imputer = SimpleImputer(strategy='mean')  # For numeric columns
if len(numeric_columns) > 0:
    X[numeric_columns] = imputer.fit_transform(X[numeric_columns])
else:
    print("No numeric columns found for imputation.")

# Handle missing values for categorical columns
imputer_cat = SimpleImputer(strategy='most_frequent')  # For categorical columns
X[categorical_columns] = imputer_cat.fit_transform(X[categorical_columns])

# Encode categorical variables if any
le = LabelEncoder()
for col in categorical_columns:
    X[col] = le.fit_transform(X[col])

No numeric columns found for imputation.


In [None]:
# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
# Step 2: Bagging Model
bagging_model = BaggingClassifier(n_estimators=50, random_state=42)
bagging_model.fit(X_train, y_train)
bagging_pred = bagging_model.predict(X_test)
bagging_accuracy = accuracy_score(y_test, bagging_pred)
print("Bagging Model Accuracy:", bagging_accuracy)

Bagging Model Accuracy: 0.8055555555555556


In [None]:
# Step 3: Boosting Model
boosting_model = AdaBoostClassifier(n_estimators=50, random_state=42)
boosting_model.fit(X_train, y_train)
boosting_pred = boosting_model.predict(X_test)
boosting_accuracy = accuracy_score(y_test, boosting_pred)
print("Boosting Model Accuracy:", boosting_accuracy)

Boosting Model Accuracy: 0.5555555555555556




In [None]:
# Step 4: Random Forest Model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
rf_pred = rf_model.predict(X_test)
rf_accuracy = accuracy_score(y_test, rf_pred)
print("Random Forest Model Accuracy:", rf_accuracy)

Random Forest Model Accuracy: 0.8333333333333334


In [None]:
from sklearn.metrics import classification_report

print("Bagging Model Report:")
print(classification_report(y_test, bagging_pred))
print("Boosting Model Report:")
print(classification_report(y_test, boosting_pred))
print("Random Forest Model Report:")
print(classification_report(y_test, rf_pred))


Bagging Model Report:
                precision    recall  f1-score   support

Bipolar Type-1       0.67      0.86      0.75         7
Bipolar Type-2       1.00      0.86      0.92         7
    Depression       0.78      0.78      0.78         9
        Normal       0.83      0.77      0.80        13

      accuracy                           0.81        36
     macro avg       0.82      0.82      0.81        36
  weighted avg       0.82      0.81      0.81        36

Boosting Model Report:
                precision    recall  f1-score   support

Bipolar Type-1       1.00      0.14      0.25         7
Bipolar Type-2       0.54      1.00      0.70         7
    Depression       0.47      1.00      0.64         9
        Normal       1.00      0.23      0.38        13

      accuracy                           0.56        36
     macro avg       0.75      0.59      0.49        36
  weighted avg       0.78      0.56      0.48        36

Random Forest Model Report:
                precision

In [None]:
!ls


Dataset-Mental-Disorders.csv  sample_data


In [None]:
# Ensure that you fit the LabelEncoder on the full target column 'y'
le = LabelEncoder()
y_encoded = le.fit_transform(y)  # This should be done before model training

# Now, when training the models, use y_encoded for training instead of y
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.3, random_state=42)

# Train the models as before
bagging_model.fit(X_train, y_train)
boosting_model.fit(X_train, y_train)
rf_model.fit(X_train, y_train)

# Make predictions
bagging_pred = bagging_model.predict(X_test)
boosting_pred = boosting_model.predict(X_test)
rf_pred = rf_model.predict(X_test)

# Inverse transform the predictions
bagging_pred_labels = le.inverse_transform(bagging_pred)
boosting_pred_labels = le.inverse_transform(boosting_pred)
rf_pred_labels = le.inverse_transform(rf_pred)

# Print the predicted diagnoses
print("Bagging Model Predictions (Mental Health Diagnoses):", bagging_pred_labels)
print("Boosting Model Predictions (Mental Health Diagnoses):", boosting_pred_labels)
print("Random Forest Model Predictions (Mental Health Diagnoses):", rf_pred_labels)




Bagging Model Predictions (Mental Health Diagnoses): [2 0 3 2 2 3 3 3 2 0 2 0 0 0 3 1 2 1 3 1 2 1 2 2 0 1 0 3 3 3 1 3 3 0 0 3]
Boosting Model Predictions (Mental Health Diagnoses): [2 1 3 2 2 2 3 2 2 2 2 1 1 1 3 1 2 1 2 1 2 1 2 2 1 1 1 2 2 2 1 2 2 0 1 2]
Random Forest Model Predictions (Mental Health Diagnoses): [2 0 3 2 2 2 3 3 2 2 2 0 0 0 3 1 2 1 3 1 2 1 2 2 1 1 0 3 2 3 1 3 3 0 0 3]
