### Data Preparation

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split

In [None]:
df = pd.read_csv("data/mental-disorder.csv")
df.drop (columns= ["Patient Number","Try-Explanation"], inplace=True)
df.rename(columns={'Sleep dissorder' : 'Sleep_dissorder'}, inplace=True)
df.rename(columns={'Mood Swing' : 'Mood_Swing'}, inplace=True)
df.rename(columns={'Suicidal thoughts' : 'Suicidal_thoughts'}, inplace=True)
df.rename(columns={'Authority Respect' : 'Authority_Respect'}, inplace=True)
df.rename(columns={'Aggressive Response' : 'Aggressive_Response'}, inplace=True)
df.rename(columns={'Ignore & Move-On' : 'Ignore_MoveOn'}, inplace=True)
df.rename(columns={'Nervous Break-down' : 'Nervous_Breakdown'}, inplace=True)
df.rename(columns={'Admit Mistakes' : 'Admit_Mistakes'}, inplace=True)
df.rename(columns={'Sexual Activity' : 'Sexual_Activity'}, inplace=True)
df.rename(columns={'Expert Diagnose' : 'Klasifikasi'}, inplace=True)

df.head()

Unnamed: 0,Sadness,Euphoric,Exhausted,Sleep_dissorder,Mood_Swing,Suicidal_thoughts,Anorxia,Authority_Respect,Aggressive_Response,Ignore_MoveOn,Nervous_Breakdown,Admit_Mistakes,Overthinking,Sexual_Activity,Concentration,Optimisim,Klasifikasi
0,Usually,Seldom,Sometimes,Sometimes,YES,YES,NO,NO,NO,NO,YES,YES,YES,3 From 10,3 From 10,4 From 10,Bipolar Type-2
1,Usually,Seldom,Usually,Sometimes,NO,YES,NO,NO,NO,NO,NO,NO,NO,4 From 10,2 From 10,5 From 10,Depression
2,Sometimes,Most-Often,Sometimes,Sometimes,YES,NO,NO,NO,YES,NO,YES,YES,NO,6 From 10,5 From 10,7 From 10,Bipolar Type-1
3,Usually,Seldom,Usually,Most-Often,YES,YES,YES,NO,NO,NO,NO,NO,NO,3 From 10,2 From 10,2 From 10,Bipolar Type-2
4,Usually,Usually,Sometimes,Sometimes,NO,NO,NO,NO,NO,NO,YES,YES,YES,5 From 10,5 From 10,6 From 10,Normal


In [None]:
X = df.drop(columns ="Klasifikasi")
y = df.Klasifikasi

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=0)
X_train.shape, X_test.shape , y_train.shape , y_test.shape

((96, 16), (24, 16), (96,), (24,))

### PREPROCESSOR

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer

In [None]:
categorical_pipeline = Pipeline([
    ("imputer", SimpleImputer(strategy="most_frequent")),
    ("encode", OneHotEncoder(handle_unknown="ignore"))
])

In [None]:
preprocessor = ColumnTransformer ([

    ("categoric", categorical_pipeline, ["Sadness","Euphoric","Exhausted","Sleep_dissorder","Mood_Swing","Suicidal_thoughts",
                                         "Anorxia","Authority_Respect","Aggressive_Response",
                                         "Ignore_MoveOn","Nervous_Breakdown","Admit_Mistakes","Overthinking",
                                         "Sexual_Activity","Concentration","Optimisim"])
])

### PIPELINE RANDOM FOREST

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
pipeline1 = Pipeline ([
    ("prep", preprocessor),
    ("algoforest", RandomForestClassifier(n_estimators=56))
])

In [None]:
pipeline1.fit(X_train,y_train)

In [None]:
pipeline1.score(X_test,y_test)

0.8333333333333334

### random forest X grid search cv

In [None]:
from sklearn.model_selection import GridSearchCV

In [None]:
parameter1 = {
    "algoforest__n_estimators" : range(1,101)

}
modelrf = GridSearchCV(pipeline1, parameter1, cv=3, n_jobs=-1, verbose=1, error_score='raise', scoring='accuracy')
modelrf.fit(X_train,y_train)

Fitting 3 folds for each of 100 candidates, totalling 300 fits


In [None]:
pd.DataFrame(modelrf.cv_results_).sort_values("rank_test_score")

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_algoforest__n_estimators,params,split0_test_score,split1_test_score,split2_test_score,mean_test_score,std_test_score,rank_test_score
86,0.224055,0.011213,0.015196,0.000247,87,{'algoforest__n_estimators': 87},0.93750,0.84375,0.90625,0.895833,0.038976,1
48,0.127994,0.001337,0.011670,0.000035,49,{'algoforest__n_estimators': 49},0.90625,0.81250,0.93750,0.885417,0.053115,2
69,0.177299,0.001753,0.014086,0.000407,70,{'algoforest__n_estimators': 70},0.90625,0.78125,0.93750,0.875000,0.067508,3
50,0.131306,0.001535,0.012199,0.000331,51,{'algoforest__n_estimators': 51},0.87500,0.84375,0.90625,0.875000,0.025516,3
31,0.094089,0.003191,0.010769,0.000372,32,{'algoforest__n_estimators': 32},0.84375,0.81250,0.93750,0.864583,0.053115,5
...,...,...,...,...,...,...,...,...,...,...,...,...
4,0.026918,0.001301,0.008066,0.000095,5,{'algoforest__n_estimators': 5},0.81250,0.56250,0.71875,0.697917,0.103120,96
5,0.028442,0.000968,0.008002,0.000044,6,{'algoforest__n_estimators': 6},0.71875,0.59375,0.71875,0.677083,0.058926,97
3,0.023279,0.000231,0.008763,0.001245,4,{'algoforest__n_estimators': 4},0.71875,0.50000,0.71875,0.645833,0.103120,98
1,0.020422,0.002672,0.008635,0.001505,2,{'algoforest__n_estimators': 2},0.34375,0.53125,0.71875,0.531250,0.153093,99


In [None]:
modelrf.best_params_

{'algoforest__n_estimators': 87}

In [None]:
modelrf.score(X_train,y_train), modelrf.score(X_test,y_test)

(1.0, 0.875)

### DATA PREDICT

In [None]:
datapred = (
    ["Most-Often","Seldom","Sometimes","Sometimes","YES","NO","YES","YES","NO","NO","YES","YES","YES","2 From 10","7 From 10","3 From 10"],
     ["Sometimes","Sometimes","Sometimes","Sometimes","NO","NO","YES","YES","NO","NO","NO","NO","NO","9 From 10","9 From 10","9 From 10"]

)

X_pred = pd.DataFrame (datapred, index=["Rudi","Budi"], columns=X.columns)
X_pred

Unnamed: 0,Sadness,Euphoric,Exhausted,Sleep_dissorder,Mood_Swing,Suicidal_thoughts,Anorxia,Authority_Respect,Aggressive_Response,Ignore_MoveOn,Nervous_Breakdown,Admit_Mistakes,Overthinking,Sexual_Activity,Concentration,Optimisim
Rudi,Most-Often,Seldom,Sometimes,Sometimes,YES,NO,YES,YES,NO,NO,YES,YES,YES,2 From 10,7 From 10,3 From 10
Budi,Sometimes,Sometimes,Sometimes,Sometimes,NO,NO,YES,YES,NO,NO,NO,NO,NO,9 From 10,9 From 10,9 From 10


In [None]:
modelrf.predict(X_pred)

array(['Bipolar Type-2', 'Normal'], dtype=object)

### PICKEL MODEL

In [None]:
import pickle

In [None]:
pickle.dump (modelrf,open("data/modelml.pkl","wb"))