In [1]:
import os
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

%matplotlib inline

In [2]:
os.chdir('..')

In [3]:
FILE_PATH = r"Data\Dataset-Mental-Disorders.csv"

In [4]:
df = pd.read_csv(FILE_PATH)
df.head()

Unnamed: 0,Patient Number,Sadness,Euphoric,Exhausted,Sleep dissorder,Mood Swing,Suicidal thoughts,Anorxia,Authority Respect,Try-Explanation,Aggressive Response,Ignore & Move-On,Nervous Break-down,Admit Mistakes,Overthinking,Sexual Activity,Concentration,Optimisim,Expert Diagnose
0,Patiant-01,Usually,Seldom,Sometimes,Sometimes,YES,YES,NO,NO,YES,NO,NO,YES,YES,YES,3 From 10,3 From 10,4 From 10,Bipolar Type-2
1,Patiant-02,Usually,Seldom,Usually,Sometimes,NO,YES,NO,NO,NO,NO,NO,NO,NO,NO,4 From 10,2 From 10,5 From 10,Depression
2,Patiant-03,Sometimes,Most-Often,Sometimes,Sometimes,YES,NO,NO,NO,YES,YES,NO,YES,YES,NO,6 From 10,5 From 10,7 From 10,Bipolar Type-1
3,Patiant-04,Usually,Seldom,Usually,Most-Often,YES,YES,YES,NO,YES,NO,NO,NO,NO,NO,3 From 10,2 From 10,2 From 10,Bipolar Type-2
4,Patiant-05,Usually,Usually,Sometimes,Sometimes,NO,NO,NO,NO,NO,NO,NO,YES,YES,YES,5 From 10,5 From 10,6 From 10,Normal


### Droping Patient Number column

In [5]:
df.drop('Patient Number', axis= 1, inplace= True)

## Preprocessing 

- As we know there are two different values of YES, namely 'YES' and 'YES ' lets remove the the space from it

In [6]:
df['Suicidal thoughts'] = list(map(lambda x: x.replace(" ","") ,df['Suicidal thoughts'].values))
df['Suicidal thoughts'].value_counts()

NO     63
YES    57
Name: Suicidal thoughts, dtype: int64

### Converting Sexual Activity Concentration Optimisim columns in numerical columns

In [7]:
def convert_to_number(string):

    try:
        return int(string[0])
    except:
        return 0

In [8]:
df['Sexual Activity'] = df['Sexual Activity'].apply(convert_to_number)

In [9]:
df['Sexual Activity']

0      3
1      4
2      6
3      3
4      5
      ..
115    2
116    6
117    1
118    7
119    7
Name: Sexual Activity, Length: 120, dtype: int64

In [10]:
df['Concentration'] = df['Concentration'].apply(convert_to_number)
df['Optimisim'] = df['Optimisim'].apply(convert_to_number)
df

Unnamed: 0,Sadness,Euphoric,Exhausted,Sleep dissorder,Mood Swing,Suicidal thoughts,Anorxia,Authority Respect,Try-Explanation,Aggressive Response,Ignore & Move-On,Nervous Break-down,Admit Mistakes,Overthinking,Sexual Activity,Concentration,Optimisim,Expert Diagnose
0,Usually,Seldom,Sometimes,Sometimes,YES,YES,NO,NO,YES,NO,NO,YES,YES,YES,3,3,4,Bipolar Type-2
1,Usually,Seldom,Usually,Sometimes,NO,YES,NO,NO,NO,NO,NO,NO,NO,NO,4,2,5,Depression
2,Sometimes,Most-Often,Sometimes,Sometimes,YES,NO,NO,NO,YES,YES,NO,YES,YES,NO,6,5,7,Bipolar Type-1
3,Usually,Seldom,Usually,Most-Often,YES,YES,YES,NO,YES,NO,NO,NO,NO,NO,3,2,2,Bipolar Type-2
4,Usually,Usually,Sometimes,Sometimes,NO,NO,NO,NO,NO,NO,NO,YES,YES,YES,5,5,6,Normal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
115,Most-Often,Seldom,Usually,Sometimes,NO,YES,NO,NO,YES,NO,YES,NO,NO,YES,2,5,3,Depression
116,Sometimes,Sometimes,Sometimes,Seldom,YES,NO,NO,NO,NO,YES,NO,NO,NO,YES,6,7,8,Bipolar Type-1
117,Usually,Sometimes,Usually,Sometimes,YES,NO,YES,YES,NO,NO,NO,YES,NO,YES,1,5,3,Bipolar Type-2
118,Usually,Sometimes,Seldom,Seldom,NO,YES,YES,NO,YES,YES,YES,NO,YES,YES,7,7,7,Depression


In [11]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 120 entries, 0 to 119
Data columns (total 18 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   Sadness              120 non-null    object
 1   Euphoric             120 non-null    object
 2   Exhausted            120 non-null    object
 3   Sleep dissorder      120 non-null    object
 4   Mood Swing           120 non-null    object
 5   Suicidal thoughts    120 non-null    object
 6   Anorxia              120 non-null    object
 7   Authority Respect    120 non-null    object
 8   Try-Explanation      120 non-null    object
 9   Aggressive Response  120 non-null    object
 10  Ignore & Move-On     120 non-null    object
 11  Nervous Break-down   120 non-null    object
 12  Admit Mistakes       120 non-null    object
 13  Overthinking         120 non-null    object
 14  Sexual Activity      120 non-null    int64 
 15  Concentration        120 non-null    int64 
 16  Optimisi

In [12]:
def convert_to_number2(string):

    try:
        if string == 'YES':
            return 1
        else:
            return 0
        
    except:
        return 0

In [13]:
df.select_dtypes(include= ['O']).columns

Index(['Sadness', 'Euphoric', 'Exhausted', 'Sleep dissorder', 'Mood Swing',
       'Suicidal thoughts', 'Anorxia', 'Authority Respect', 'Try-Explanation',
       'Aggressive Response', 'Ignore & Move-On', 'Nervous Break-down',
       'Admit Mistakes', 'Overthinking', 'Expert Diagnose'],
      dtype='object')

In [14]:
columns_to_change = ['Suicidal thoughts', 'Anorxia', 'Authority Respect', 'Try-Explanation',
                     'Aggressive Response', 'Ignore & Move-On', 'Nervous Break-down',
                     'Admit Mistakes', 'Overthinking','Mood Swing']

for col in columns_to_change:
    df[col] = df[col].apply(convert_to_number2)

df

Unnamed: 0,Sadness,Euphoric,Exhausted,Sleep dissorder,Mood Swing,Suicidal thoughts,Anorxia,Authority Respect,Try-Explanation,Aggressive Response,Ignore & Move-On,Nervous Break-down,Admit Mistakes,Overthinking,Sexual Activity,Concentration,Optimisim,Expert Diagnose
0,Usually,Seldom,Sometimes,Sometimes,1,1,0,0,1,0,0,1,1,1,3,3,4,Bipolar Type-2
1,Usually,Seldom,Usually,Sometimes,0,1,0,0,0,0,0,0,0,0,4,2,5,Depression
2,Sometimes,Most-Often,Sometimes,Sometimes,1,0,0,0,1,1,0,1,1,0,6,5,7,Bipolar Type-1
3,Usually,Seldom,Usually,Most-Often,1,1,1,0,1,0,0,0,0,0,3,2,2,Bipolar Type-2
4,Usually,Usually,Sometimes,Sometimes,0,0,0,0,0,0,0,1,1,1,5,5,6,Normal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
115,Most-Often,Seldom,Usually,Sometimes,0,1,0,0,1,0,1,0,0,1,2,5,3,Depression
116,Sometimes,Sometimes,Sometimes,Seldom,1,0,0,0,0,1,0,0,0,1,6,7,8,Bipolar Type-1
117,Usually,Sometimes,Usually,Sometimes,1,0,1,1,0,0,0,1,0,1,1,5,3,Bipolar Type-2
118,Usually,Sometimes,Seldom,Seldom,0,1,1,0,1,1,1,0,1,1,7,7,7,Depression


In [15]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 120 entries, 0 to 119
Data columns (total 18 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   Sadness              120 non-null    object
 1   Euphoric             120 non-null    object
 2   Exhausted            120 non-null    object
 3   Sleep dissorder      120 non-null    object
 4   Mood Swing           120 non-null    int64 
 5   Suicidal thoughts    120 non-null    int64 
 6   Anorxia              120 non-null    int64 
 7   Authority Respect    120 non-null    int64 
 8   Try-Explanation      120 non-null    int64 
 9   Aggressive Response  120 non-null    int64 
 10  Ignore & Move-On     120 non-null    int64 
 11  Nervous Break-down   120 non-null    int64 
 12  Admit Mistakes       120 non-null    int64 
 13  Overthinking         120 non-null    int64 
 14  Sexual Activity      120 non-null    int64 
 15  Concentration        120 non-null    int64 
 16  Optimisi

In [16]:
X = df.drop('Expert Diagnose', axis= 1)
y = df['Expert Diagnose']
X,y

(        Sadness    Euphoric  Exhausted Sleep dissorder  Mood Swing  \
 0       Usually      Seldom  Sometimes       Sometimes           1   
 1       Usually      Seldom    Usually       Sometimes           0   
 2     Sometimes  Most-Often  Sometimes       Sometimes           1   
 3       Usually      Seldom    Usually      Most-Often           1   
 4       Usually     Usually  Sometimes       Sometimes           0   
 ..          ...         ...        ...             ...         ...   
 115  Most-Often      Seldom    Usually       Sometimes           0   
 116   Sometimes   Sometimes  Sometimes          Seldom           1   
 117     Usually   Sometimes    Usually       Sometimes           1   
 118     Usually   Sometimes     Seldom          Seldom           0   
 119   Sometimes     Usually     Seldom         Usually           0   
 
      Suicidal thoughts  Anorxia  Authority Respect  Try-Explanation  \
 0                    1        0                  0                1   
 1

In [17]:
num_features = X.select_dtypes(exclude= 'object').columns
cat_features = X.select_dtypes(include= 'object').columns

In [18]:
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

In [19]:
numeric_transformer = StandardScaler()
oh_transformer = OneHotEncoder()

In [20]:
preprocessor = ColumnTransformer(
    [
        ("OneHotEncoder", oh_transformer, cat_features),
         ("StandardScaler", numeric_transformer, num_features),        
    ]
)

In [21]:
X = preprocessor.fit_transform(X)


In [22]:
X.shape

(120, 29)

### Converting Target Variable - Expert Diagnose into numeric feature for classification

In [23]:
from sklearn.preprocessing import LabelEncoder

In [24]:
label_encoder = LabelEncoder()

In [25]:
y = label_encoder.fit_transform(y)

In [26]:
y

array([1, 2, 0, 1, 3, 2, 0, 3, 1, 2, 3, 0, 1, 3, 2, 0, 3, 0, 2, 3, 1, 0,
       3, 2, 1, 3, 3, 2, 1, 3, 0, 2, 1, 0, 3, 2, 0, 1, 3, 1, 2, 1, 3, 0,
       2, 1, 3, 1, 0, 2, 3, 1, 2, 0, 1, 3, 0, 0, 2, 3, 1, 2, 0, 1, 3, 2,
       3, 0, 1, 0, 2, 1, 0, 3, 1, 2, 2, 1, 3, 2, 0, 1, 2, 3, 0, 1, 2, 0,
       0, 3, 2, 1, 3, 0, 2, 1, 0, 3, 2, 1, 0, 2, 0, 3, 1, 2, 3, 2, 1, 2,
       3, 1, 0, 1, 3, 2, 0, 1, 2, 3])

### Train-Test Split

In [27]:
from sklearn.model_selection import train_test_split

In [28]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.2, random_state= 42)

In [30]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

In [34]:
# dt = DecisionTreeClassifier()

In [35]:
# dt.fit(X_train, y_train)
# preds = dt.predict(X_test)
# score = accuracy_score(y_test, preds)
# print(score)


0.75


In [39]:
models = {
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier(),
    'Naive Bayes': GaussianNB(),
    'KNN': KNeighborsClassifier()
}

models_list = []
acc_score_list = []

for i in range(len(list(models))):
    
    model = list(models.values())[i]

    model.fit(X_train,y_train)

    #Making Predictions
    y_train_preds = model.predict(X_train)
    y_test_preds = model.predict(X_test)

    #Evaluating model

    acc_score_train = accuracy_score(y_train,y_train_preds)
    acc_score_test = accuracy_score(y_test, y_test_preds)

    print(f">>>>>>>>>>{list(models.keys())[i]}<<<<<<<<<")
    models_list.append(list(models.keys())[i])
    print('Model performance for Training set')
    print(f'Accuracy Score is Train Set {acc_score_train}')
    print('----------------------------------')
    
    print('Model performance for Test set')
    print(f'Accuracy Score is Test Set {acc_score_test}')

    acc_score_list.append(acc_score_test)
    print("="*30)
    print("\n")

>>>>>>>>>>Decision Tree<<<<<<<<<
Model performance for Training set
Accuracy Score is Train Set 1.0
----------------------------------
Model performance for Test set
Accuracy Score is Test Set 0.8333333333333334


>>>>>>>>>>Random Forest<<<<<<<<<
Model performance for Training set
Accuracy Score is Train Set 1.0
----------------------------------
Model performance for Test set
Accuracy Score is Test Set 0.8333333333333334


>>>>>>>>>>Naive Bayes<<<<<<<<<
Model performance for Training set
Accuracy Score is Train Set 0.78125
----------------------------------
Model performance for Test set
Accuracy Score is Test Set 0.75


>>>>>>>>>>KNN<<<<<<<<<
Model performance for Training set
Accuracy Score is Train Set 0.8645833333333334
----------------------------------
Model performance for Test set
Accuracy Score is Test Set 0.625




In [40]:
acc_score_list

[0.8333333333333334, 0.8333333333333334, 0.75, 0.625]

In [45]:
pd.DataFrame(list(zip(models_list,acc_score_list)), columns= ['Model Name', 'Accuracy Score']).sort_values(by= 'Accuracy Score', ascending= False)

Unnamed: 0,Model Name,Accuracy Score
0,Decision Tree,0.833333
1,Random Forest,0.833333
2,Naive Bayes,0.75
3,KNN,0.625


## As we can see decision tree and random forest have best accuracy out the all the four models

In [None]:
##