# **Mental Health Prediction**

## Importing the libraries

In [20]:
import numpy as np
import pandas as pd

## Importing the dataset

In [21]:
dataset = pd.read_csv('dataset[1].csv')
#X is a matrix of size m by n
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values
m,n=X.shape

In [22]:
print(X)

[['yes' 'yes' 'yes' ... 'no' 'no' 'no']
 ['no' 'no' 'no' ... 'no' 'no' 'no']
 ['no' 'no' 'no' ... 'no' 'no' 'no']
 ...
 ['no' 'no' 'no' ... 'no' 'no' 'no']
 ['no' 'no' 'no' ... 'yes' 'yes' 'yes']
 ['no' 'no' 'no' ... 'no' 'no' 'no']]


In [23]:
print(y)

['Anxiety' 'Depression' 'Loneliness' ... 'Loneliness' 'Stress' 'Normal']


##Encoding Categorical Data

### Encoding The Independent Variable

In [24]:
from sklearn.preprocessing import LabelEncoder
#label encoding in the matrix X
le = LabelEncoder()
#applying label encoding each column till n-1
for i in range (n):
 X[:,i]=le.fit_transform(X[:,i])

In [25]:
print(X)

[[1 1 1 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 1 1 1]
 [0 0 0 ... 0 0 0]]


###Encoding Independent Variable

In [26]:
#converting y into a one D array
y=np.array(y)
# applying label encoding to y
y = le.fit_transform(y)

In [27]:
print(y)

[0 1 2 ... 2 4 3]


## Splitting the dataset into the Training set and Test set

In [28]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

In [29]:
print(X_train)

[[1 1 1 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 1 1 1]
 [0 0 0 ... 1 1 1]
 [0 0 0 ... 0 0 0]]


In [30]:
print(y_train)

[0 1 2 ... 4 4 2]


In [31]:
print(X_test)

[[0 0 0 ... 1 1 1]
 [0 0 0 ... 1 1 1]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 1 1 1]]


In [32]:
print(y_test)

[4 4 2 ... 1 2 4]


## Training the Logistic Regression model on the Training set

In [33]:
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(random_state = 0)
classifier.fit(X_train, y_train)
#from sklearn.ensemble import RandomForestClassifier
#classifier = RandomForestClassifier(n_estimators = 100, criterion = 'entropy', random_state = 0)
#classifier.fit(X_train, y_train)
#from sklearn.tree import DecisionTreeClassifier
#classifier = DecisionTreeClassifier(criterion = 'entropy', random_state = 0)
#classifier.fit(X_train, y_train)
#from sklearn.svm import SVC
#classifier = SVC(kernel = 'linear', random_state = 0)
#classifier.fit(X_train, y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=0, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

##Svaing The Model


In [34]:
import joblib
#saving the model to the Disk
file_name='mentalHealthFinalised'
joblib.dump(classifier,file_name)


['mentalHealthFinalised']

###Load The Model From The Disk 

In [35]:
import joblib
#loading the model from Disk
loaded_model=joblib.load('mentalHealthFinalised.sav')

## Predicting the Test set results

In [36]:
y_pred = loaded_model.predict(X_test)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[4 4]
 [4 4]
 [2 2]
 ...
 [1 1]
 [2 2]
 [4 4]]


## Making the Confusion Matrix

In [37]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
#confusion matrix
print(cm)
#calculating the accuracy score of the model
accuracy_score(y_test, y_pred)

[[1965    0    0    0    0]
 [   0 2058    0    0    0]
 [   0    0 2086    0    0]
 [   0    0    0 2069    0]
 [   0    0    0    0 2062]]


1.0

## Predicting a new result

In [38]:
#testing the model on a random input
print(loaded_model.predict([[0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0]]))
print(le.classes_)

[3]
['Anxiety' 'Depression' 'Loneliness' 'Normal' 'Stress']
