In [44]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [45]:
def run_all_models(X_train_scaled, X_test_scaled, y_train_encoded, y_test_encoded):
    models = {
        "LogisticRegression": LogisticRegression(),
        "SVC": SVC(),
        "DecisionTreeClassifier": DecisionTreeClassifier(random_state=42),
        "RandomForestClassifier": RandomForestClassifier(random_state=42),
        "KNeighborsClassifier": KNeighborsClassifier(),
        "GradientBoostingClassifier": GradientBoostingClassifier(random_state=42),
        "GaussianNB": GaussianNB(),
    }

    # Create an empty DataFrame to store accuracy
    model_performance = []

    for name, model in models.items():
        model.fit(X_train_scaled, y_train_encoded)
        predictions = model.predict(X_test_scaled)
        accuracy = accuracy_score(y_test_encoded, predictions)
        print(f"{name} Accuracy:", accuracy)
        model_performance.append({"Model": name, "Accuracy": accuracy})

    # For the Sequential model
    sequential_model = Sequential()
    sequential_model.add(Dense(64, input_dim=X_train_scaled.shape[1], activation='relu'))
    sequential_model.add(Dense(32, activation='relu'))
    sequential_model.add(Dense(1, activation='sigmoid'))
    sequential_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    sequential_model.fit(X_train_scaled, y_train_encoded, epochs=50, batch_size=10, verbose=1)
    loss, accuracy = sequential_model.evaluate(X_test_scaled, y_test_encoded)
    print(f"Sequential Model Accuracy: {accuracy}")
    print(f"Sequential Model Loss: {loss}")
    model_performance.append({"Model": "Sequential", "Accuracy": accuracy})

    return pd.DataFrame(model_performance)


def hot_encode_binary_features(df:pd.DataFrame, columns:list)->pd.DataFrame:
    """_summary_

    Args:
        df (pd.DataFrame): _description_
        colums (list): _description_

    Returns:
        pd.DataFrame: _description_
    """
    
    for col in columns:
        df[col] = df[col].apply(lambda x: 1 if x == "YES" else 0)
    
    return df

In [46]:
df = pd.read_csv("Dataset-Mental-Disorders.csv")
df

Unnamed: 0,Patient Number,Sadness,Euphoric,Exhausted,Sleep dissorder,Mood Swing,Suicidal thoughts,Anorxia,Authority Respect,Try-Explanation,Aggressive Response,Ignore & Move-On,Nervous Break-down,Admit Mistakes,Overthinking,Sexual Activity,Concentration,Optimisim,Expert Diagnose
0,Patiant-01,Usually,Seldom,Sometimes,Sometimes,YES,YES,NO,NO,YES,NO,NO,YES,YES,YES,3 From 10,3 From 10,4 From 10,Bipolar Type-2
1,Patiant-02,Usually,Seldom,Usually,Sometimes,NO,YES,NO,NO,NO,NO,NO,NO,NO,NO,4 From 10,2 From 10,5 From 10,Depression
2,Patiant-03,Sometimes,Most-Often,Sometimes,Sometimes,YES,NO,NO,NO,YES,YES,NO,YES,YES,NO,6 From 10,5 From 10,7 From 10,Bipolar Type-1
3,Patiant-04,Usually,Seldom,Usually,Most-Often,YES,YES,YES,NO,YES,NO,NO,NO,NO,NO,3 From 10,2 From 10,2 From 10,Bipolar Type-2
4,Patiant-05,Usually,Usually,Sometimes,Sometimes,NO,NO,NO,NO,NO,NO,NO,YES,YES,YES,5 From 10,5 From 10,6 From 10,Normal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
115,Patiant-116,Most-Often,Seldom,Usually,Sometimes,NO,YES,NO,NO,YES,NO,YES,NO,NO,YES,2 From 10,5 From 10,3 From 10,Depression
116,Patiant-117,Sometimes,Sometimes,Sometimes,Seldom,YES,NO,NO,NO,NO,YES,NO,NO,NO,YES,6 From 10,7 From 10,8 From 10,Bipolar Type-1
117,Patiant-118,Usually,Sometimes,Usually,Sometimes,YES,NO,YES,YES,NO,NO,NO,YES,NO,YES,1 From 10,5 From 10,3 From 10,Bipolar Type-2
118,Patiant-119,Usually,Sometimes,Seldom,Seldom,NO,YES,YES,NO,YES,YES,YES,NO,YES,YES,7 From 10,7 From 10,7 From 10,Depression


In [47]:
df.drop("Patient Number", axis=1, inplace=True)
df["Suicidal thoughts"] =df["Suicidal thoughts"].apply(lambda x: x.strip())

In [48]:
binary_features, from_columns = [], []

for col in df.columns[:-1]:
  col_values = df[col].nunique()
  print(f"--------------------------------------")
  print(f"Col is: {col}")
  print(f" Unique values are: {df[col].unique()}")
  print(f" Number of unique values are: {col_values}")
  
  if col_values == 2:
    binary_features.append(col)

--------------------------------------
Col is: Sadness
 Unique values are: ['Usually' 'Sometimes' 'Seldom' 'Most-Often']
 Number of unique values are: 4
--------------------------------------
Col is: Euphoric
 Unique values are: ['Seldom' 'Most-Often' 'Usually' 'Sometimes']
 Number of unique values are: 4
--------------------------------------
Col is: Exhausted
 Unique values are: ['Sometimes' 'Usually' 'Seldom' 'Most-Often']
 Number of unique values are: 4
--------------------------------------
Col is: Sleep dissorder
 Unique values are: ['Sometimes' 'Most-Often' 'Usually' 'Seldom']
 Number of unique values are: 4
--------------------------------------
Col is: Mood Swing
 Unique values are: ['YES' 'NO']
 Number of unique values are: 2
--------------------------------------
Col is: Suicidal thoughts
 Unique values are: ['YES' 'NO']
 Number of unique values are: 2
--------------------------------------
Col is: Anorxia
 Unique values are: ['NO' 'YES']
 Number of unique values are: 2
----

In [49]:
from_columns = ["Sexual Activity", "Concentration", "Optimisim"]

for col in from_columns:
  df[col] = df[col].apply(lambda x: int(x.strip()[0]))

In [50]:
df = hot_encode_binary_features(df, binary_features)

In [51]:
freq_column_values = {"Seldom":0, "Sometimes":1, "Usually":2, "Most-Often":3}
freq_column = ["Sadness", "Euphoric","Exhausted", "Sleep dissorder"]
for col in freq_column:
  df[col] = df[col].map(freq_column_values).astype(int)
  


In [52]:
mapping_dict = {'Normal': 0, 'Bipolar Type-1': 1, 'Bipolar Type-2': 2, 'Depression': 3}
df['Expert Diagnose'] = df['Expert Diagnose'].map(mapping_dict).astype(int)
df.head()

Unnamed: 0,Sadness,Euphoric,Exhausted,Sleep dissorder,Mood Swing,Suicidal thoughts,Anorxia,Authority Respect,Try-Explanation,Aggressive Response,Ignore & Move-On,Nervous Break-down,Admit Mistakes,Overthinking,Sexual Activity,Concentration,Optimisim,Expert Diagnose
0,2,0,1,1,1,1,0,0,1,0,0,1,1,1,3,3,4,2
1,2,0,2,1,0,1,0,0,0,0,0,0,0,0,4,2,5,3
2,1,3,1,1,1,0,0,0,1,1,0,1,1,0,6,5,7,1
3,2,0,2,3,1,1,1,0,1,0,0,0,0,0,3,2,2,2
4,2,2,1,1,0,0,0,0,0,0,0,1,1,1,5,5,6,0


In [53]:
X, y = df.drop("Expert Diagnose", axis =1), df["Expert Diagnose"]
X_train, X_test, y_train_encoded, y_test_encoded = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()

X_train_scaled, X_test_scaled = scaler.fit_transform(X_train), scaler.transform(X_test)

models = run_all_models(X_train_scaled, X_test_scaled, y_train_encoded, y_test_encoded)
models

LogisticRegression Accuracy: 0.8333333333333334
SVC Accuracy: 0.8333333333333334
DecisionTreeClassifier Accuracy: 0.75
RandomForestClassifier Accuracy: 0.7916666666666666
KNeighborsClassifier Accuracy: 0.6666666666666666
GradientBoostingClassifier Accuracy: 0.7916666666666666
GaussianNB Accuracy: 0.7083333333333334
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Sequential Model Accuracy: 0.25
Sequential Model Loss: -2655.577392578125


Unnamed: 0,Model,Accuracy
0,LogisticRegression,0.833333
1,SVC,0.833333
2,DecisionTreeClassifier,0.75
3,RandomForestClassifier,0.791667
4,KNeighborsClassifier,0.666667
5,GradientBoostingClassifier,0.791667
6,GaussianNB,0.708333
7,Sequential,0.25
