In [3]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score 

In [4]:
kesehatan_mental_dataset = pd.read_csv('Depression-Professional-Dataset.csv')

In [5]:
kesehatan_mental_dataset.head()

Unnamed: 0,Gender,Age,Work Pressure,Job Satisfaction,Sleep Duration,Dietary Habits,Have you ever had suicidal thoughts,Work Hours,Financial Stress,Family History of Mental Illness,Depression
0,Female,37,2,4,7-8 hours,Moderate,1,6,2,No,1
1,Male,60,4,3,5-6 hours,Unhealthy,0,0,4,Yes,1
2,Female,42,2,3,5-6 hours,Moderate,1,0,2,No,1
3,Female,44,3,5,7-8 hours,Healthy,0,1,2,Yes,1
4,Male,48,4,3,7-8 hours,Moderate,0,6,5,Yes,1


In [6]:
kesehatan_mental_dataset.shape

(2054, 11)

In [7]:
kesehatan_mental_dataset['Depression'].value_counts()

Depression
1    1851
0     203
Name: count, dtype: int64

In [8]:
features = ['Age','Work Pressure','Job Satisfaction','Have you ever had suicidal thoughts','Work Hours','Financial Stress']
x = kesehatan_mental_dataset[features]
y = kesehatan_mental_dataset['Depression']
x.shape, y.shape

((2054, 6), (2054,))

In [9]:
print(x)

      Age  Work Pressure  Job Satisfaction  \
0      37              2                 4   
1      60              4                 3   
2      42              2                 3   
3      44              3                 5   
4      48              4                 3   
...   ...            ...               ...   
2049   45              1                 3   
2050   25              1                 5   
2051   23              3                 1   
2052   24              1                 4   
2053   56              2                 3   

      Have you ever had suicidal thoughts  Work Hours  Financial Stress  
0                                       1           6                 2  
1                                       0           0                 4  
2                                       1           0                 2  
3                                       0           1                 2  
4                                       0           6                 5  
...

In [10]:
print(y)

0       1
1       1
2       1
3       1
4       1
       ..
2049    1
2050    1
2051    0
2052    1
2053    1
Name: Depression, Length: 2054, dtype: int64


In [11]:
scaler = StandardScaler()

In [12]:
scaler.fit(x)

In [13]:
standarized_data = scaler.transform(x)

In [14]:
print(standarized_data)

[[-0.45135923 -0.72119407  0.69453264  0.96365891  0.01832306 -0.69254486]
 [ 1.55589979  0.69027103 -0.01064286 -1.03771157 -1.57191237  0.72286523]
 [-0.01499857 -0.72119407 -0.01064286  0.96365891 -1.57191237 -0.69254486]
 ...
 [-1.67316906 -0.01546152 -1.42099387 -1.03771157  0.54840154  0.01516019]
 [-1.58589693 -1.42692662  0.69453264 -1.03771157 -0.51175541  0.72286523]
 [ 1.20681127 -0.72119407 -0.01064286  0.96365891 -0.51175541  1.43057028]]


In [15]:
x = standarized_data
y = kesehatan_mental_dataset['Depression']

In [16]:
print(x)
print(y)

[[-0.45135923 -0.72119407  0.69453264  0.96365891  0.01832306 -0.69254486]
 [ 1.55589979  0.69027103 -0.01064286 -1.03771157 -1.57191237  0.72286523]
 [-0.01499857 -0.72119407 -0.01064286  0.96365891 -1.57191237 -0.69254486]
 ...
 [-1.67316906 -0.01546152 -1.42099387 -1.03771157  0.54840154  0.01516019]
 [-1.58589693 -1.42692662  0.69453264 -1.03771157 -0.51175541  0.72286523]
 [ 1.20681127 -0.72119407 -0.01064286  0.96365891 -0.51175541  1.43057028]]
0       1
1       1
2       1
3       1
4       1
       ..
2049    1
2050    1
2051    0
2052    1
2053    1
Name: Depression, Length: 2054, dtype: int64


In [17]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.4, stratify=y, random_state=4)

In [18]:
print(x.shape, x_train.shape, x_test.shape)

(2054, 6) (1232, 6) (822, 6)


In [19]:
classifier = svm.SVC(kernel='linear')

In [20]:
classifier.fit(x_train, y_train)

In [21]:
x_train_prediction = classifier.predict(x_train)
training_data_accuracy = accuracy_score(x_train_prediction, y_train)

In [22]:
print('Akurasi data training adalah = ', training_data_accuracy)

Akurasi data training adalah =  0.976461038961039


In [23]:
x_test_prediction = classifier.predict(x_test)
test_data_accuracy = accuracy_score(x_test_prediction, y_test)

In [24]:
print('Akurasi data testing adalah = ', test_data_accuracy)

Akurasi data testing adalah =  0.9793187347931873


In [25]:
input_data = (18, 3, 1, 0, 7, 3)

input_data_as_numpy_array = np.array(input_data)

input_data_reshape = input_data_as_numpy_array.reshape(1,-1)

std_data = scaler.transform(input_data_reshape)
print(std_data)

prediction = classifier.predict(std_data)
print(prediction)

if (prediction[0] == 0):
    print('pasien tidak depresi')
else :
    print('pasien depresi')

[[-2.10952972 -0.01546152 -1.42099387 -1.03771157  0.2833623   0.01516019]]
[0]
pasien tidak depresi




In [26]:
import pickle

In [27]:
filename = 'depresi_model.sav'
pickle.dump(classifier, open(filename,'wb'))