## Import Libraries

In [None]:
import numpy as np
import pandas as pd
import os
import seaborn as sb
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
from sklearn.model_selection import GridSearchCV,cross_val_score




## Import the dataset

In [None]:
data = pd.read_csv("Data/dataset.csv")
data.head()

In [None]:
data['Disorder'].unique()

## Converting the dataset to a numpy array

In [None]:
df = np.array(data.to_numpy())

## Labeling the categorical data using label encoder

In [None]:
from sklearn.preprocessing import LabelEncoder
le=LabelEncoder()
for i in data[:]:
    data[i]=le.fit(data[i]).transform(data[i])



## Reading the Column names

In [None]:
col = list()
for i in data:
    col.append(i)

## Checking for null values

In [None]:
data.isnull().sum()

## Splitting data into variables

In [None]:
X,Y=data[col[:24]], data['Disorder']

## Splitting into training and testing data

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, y_test=train_test_split(X,Y, test_size=.33, random_state=42)

## Decision tree classifier

In [None]:
from sklearn.tree import DecisionTreeClassifier
clf_gini = DecisionTreeClassifier(  max_depth= 10,
                                    max_features = 'auto',
                                    min_samples_leaf= 1,
                                    min_samples_split = 2)
clf_gini.fit(X_train, Y_train)

In [None]:
y_pred_gini = clf_gini.predict(X_test)

In [None]:
from sklearn.metrics import accuracy_score
print('Model accuracy score with criterion gini index: {0:0.4f}'. format(accuracy_score(y_test, y_pred_gini)))

In [None]:
prediction_test = clf_gini.predict(X_test)
classes = ['Anxiety', 'Depression', 'Loneliness', 'Stress', 'Normal']
def plot_confusionmatrix(pred,test,dom):
    print(f'{dom} Confusion matrix')
    cf = confusion_matrix(pred,test)
    sb.heatmap(cf,annot=True,yticklabels=classes
               ,xticklabels=classes,cmap='Blues', fmt='g')
    plt.tight_layout()
    plt.show()    

In [None]:
#Training Accuracy
print("Training Accuracy is: ", clf_gini.score(X_train, Y_train))
#Test Accuracy
print("Testing Accuracy is: ", clf_gini.score(X_test, y_test))


plot_confusionmatrix(y_test,prediction_test,dom='Test')


## Random Forest Classifier

In [None]:
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(bootstrap= True,
 max_depth = 1,
 max_features = 'auto',
 min_samples_leaf = 1,
 min_samples_split = 2,
 n_estimators = 20)
model.fit(X_train, Y_train)

In [None]:
random_predict = model.predict(X_test)
print('Model accuracy score with criterion gini index: {0:0.4f}'. format(accuracy_score(y_test, random_predict)))

In [None]:
#Training Accuracy
print("Training Accuracy is: ", model.score(X_train, Y_train))
#Test Accuracy
print("Testing Accuracy is: ", model.score(X_test, y_test))


plot_confusionmatrix(y_test,prediction_test,dom='Test')


## Test Case 1

In [None]:
t1 = ['panic' , 'trouble.in.concentration', 'having.trouble.in.sleeping', 'feeling.nervous' , 'sweating']

In [None]:
r = []
for i in col[0:24]:
    if i in t1:
        r.append(1)
    else:
        r.append(0)
b=[r]
test = pd.DataFrame(b)
test


In [None]:
if y_pred_gini[0] == 0:
    disorder = "Anxiety"
elif y_pred_gini[0] == 1:
    disorder = "Depression"
elif y_pred_gini[0] == 2:
    disorder = "Loneliness"
elif y_pred_gini[0] == 3:
    disorder = "Normal"
elif y_pred_gini[0] == 4:
    disorder = "Stress"

print(disorder)

# Hyper parameter Tuning

In [None]:
X,Y=data[col[:24]], data['Disorder']

In [None]:
# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start = 20, stop = 100, num = 10)]
# Number of features to consider at every split
max_features = ['auto', 'sqrt']
# Maximum number of levels in tree
max_depth = [10,20]
# Minimum number of samples required to split a node
min_samples_split = [2, 5]
# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 2]
# Method of selecting samples for training each tree
bootstrap = [True, False]

In [None]:
# Create the param grid
import random
param_grid = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
               'bootstrap': bootstrap}
print(param_grid)

In [None]:
model = RandomForestClassifier()

In [None]:
from sklearn.model_selection import GridSearchCV
rf_Grid = GridSearchCV(estimator = model, param_grid = param_grid, cv = 20, verbose=2, n_jobs = 4)

In [None]:
rf_Grid.fit(X_train, Y_train)

In [None]:
rf_Grid.best_params_

In [None]:
print (f'Train Accuracy - : {rf_Grid.score(X_train,Y_train):.3f}')
print (f'Test Accuracy - : {rf_Grid.score(X_test,y_test):.3f}')

In [None]:
prediction_test = rf_Grid.predict(X_test)

In [None]:
plot_confusionmatrix(y_test,prediction_test,dom='Test')