### Data Preparation 

In [1]:
import pandas as pd 
from sklearn.model_selection import train_test_split 

In [2]:
df = pd.read_csv("data/mental-disorder.csv")
df.drop (columns= ["Patient Number","Try-Explanation"], inplace=True)
df.rename(columns={'Sleep dissorder' : 'Sleep_dissorder'}, inplace=True)
df.rename(columns={'Mood Swing' : 'Mood_Swing'}, inplace=True)
df.rename(columns={'Suicidal thoughts' : 'Suicidal_thoughts'}, inplace=True)
df.rename(columns={'Authority Respect' : 'Authority_Respect'}, inplace=True)
df.rename(columns={'Aggressive Response' : 'Aggressive_Response'}, inplace=True)
df.rename(columns={'Ignore & Move-On' : 'Ignore_MoveOn'}, inplace=True)
df.rename(columns={'Nervous Break-down' : 'Nervous_Breakdown'}, inplace=True)
df.rename(columns={'Admit Mistakes' : 'Admit_Mistakes'}, inplace=True)
df.rename(columns={'Sexual Activity' : 'Sexual_Activity'}, inplace=True)
df.rename(columns={'Expert Diagnose' : 'Klasifikasi'}, inplace=True)

df.head()

Unnamed: 0,Sadness,Euphoric,Exhausted,Sleep_dissorder,Mood_Swing,Suicidal_thoughts,Anorxia,Authority_Respect,Aggressive_Response,Ignore_MoveOn,Nervous_Breakdown,Admit_Mistakes,Overthinking,Sexual_Activity,Concentration,Optimisim,Klasifikasi
0,Usually,Seldom,Sometimes,Sometimes,YES,YES,NO,NO,NO,NO,YES,YES,YES,3 From 10,3 From 10,4 From 10,Bipolar Type-2
1,Usually,Seldom,Usually,Sometimes,NO,YES,NO,NO,NO,NO,NO,NO,NO,4 From 10,2 From 10,5 From 10,Depression
2,Sometimes,Most-Often,Sometimes,Sometimes,YES,NO,NO,NO,YES,NO,YES,YES,NO,6 From 10,5 From 10,7 From 10,Bipolar Type-1
3,Usually,Seldom,Usually,Most-Often,YES,YES,YES,NO,NO,NO,NO,NO,NO,3 From 10,2 From 10,2 From 10,Bipolar Type-2
4,Usually,Usually,Sometimes,Sometimes,NO,NO,NO,NO,NO,NO,YES,YES,YES,5 From 10,5 From 10,6 From 10,Normal


In [3]:
X = df.drop(columns ="Klasifikasi")
y = df.Klasifikasi

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=0)
X_train.shape, X_test.shape , y_train.shape , y_test.shape

((96, 16), (24, 16), (96,), (24,))

### PREPROCESSOR 

In [4]:
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer

In [5]:
categorical_pipeline = Pipeline([
    ("imputer", SimpleImputer(strategy="most_frequent")),
    ("encode", OneHotEncoder(handle_unknown="ignore"))
])

In [6]:
preprocessor = ColumnTransformer ([

    ("categoric", categorical_pipeline, ["Sadness","Euphoric","Exhausted","Sleep_dissorder","Mood_Swing","Suicidal_thoughts",
                                         "Anorxia","Authority_Respect","Aggressive_Response", 
                                         "Ignore_MoveOn","Nervous_Breakdown","Admit_Mistakes","Overthinking",
                                         "Sexual_Activity","Concentration","Optimisim"])
])

### PIPELINE RANDOM FOREST

In [7]:
from sklearn.ensemble import RandomForestClassifier

In [22]:
pipeline1 = Pipeline ([
    ("prep", preprocessor),
    ("algoforest", RandomForestClassifier(n_estimators=56))
])

In [23]:
pipeline1.fit(X_train,y_train)

Pipeline(steps=[('prep',
                 ColumnTransformer(transformers=[('categoric',
                                                  Pipeline(steps=[('imputer',
                                                                   SimpleImputer(strategy='most_frequent')),
                                                                  ('encode',
                                                                   OneHotEncoder(handle_unknown='ignore'))]),
                                                  ['Sadness', 'Euphoric',
                                                   'Exhausted',
                                                   'Sleep_dissorder',
                                                   'Mood_Swing',
                                                   'Suicidal_thoughts',
                                                   'Anorxia',
                                                   'Authority_Respect',
                                                   'Aggress

In [24]:
pipeline1.score(X_test,y_test)

0.875

### random forest X grid search cv 

In [11]:
from sklearn.model_selection import GridSearchCV

In [25]:
parameter1 = {
    "algoforest__n_estimators" : range(1,101)
    
}
modelrf = GridSearchCV(pipeline1, parameter1, cv=3, n_jobs=-1, verbose=1, error_score='raise', scoring='accuracy')
modelrf.fit(X_train,y_train)

Fitting 3 folds for each of 100 candidates, totalling 300 fits


GridSearchCV(cv=3, error_score='raise',
             estimator=Pipeline(steps=[('prep',
                                        ColumnTransformer(transformers=[('categoric',
                                                                         Pipeline(steps=[('imputer',
                                                                                          SimpleImputer(strategy='most_frequent')),
                                                                                         ('encode',
                                                                                          OneHotEncoder(handle_unknown='ignore'))]),
                                                                         ['Sadness',
                                                                          'Euphoric',
                                                                          'Exhausted',
                                                                          'Sleep_dissorder',
         

In [26]:
pd.DataFrame(modelrf.cv_results_).sort_values("rank_test_score")

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_algoforest__n_estimators,params,split0_test_score,split1_test_score,split2_test_score,mean_test_score,std_test_score,rank_test_score
94,0.237020,0.002944,0.017668,4.706415e-04,95,{'algoforest__n_estimators': 95},0.84375,0.87500,0.93750,0.885417,0.038976,1
33,0.096674,0.003091,0.016670,7.320307e-03,34,{'algoforest__n_estimators': 34},0.87500,0.84375,0.90625,0.875000,0.025516,2
42,0.122677,0.004992,0.012667,4.722735e-04,43,{'algoforest__n_estimators': 43},0.87500,0.81250,0.93750,0.875000,0.051031,2
85,0.212016,0.005657,0.020336,5.437068e-03,86,{'algoforest__n_estimators': 86},0.87500,0.81250,0.93750,0.875000,0.051031,2
67,0.179014,0.000814,0.015668,4.701344e-04,68,{'algoforest__n_estimators': 68},0.87500,0.81250,0.90625,0.864583,0.038976,5
...,...,...,...,...,...,...,...,...,...,...,...,...
3,0.032001,0.001413,0.010001,2.158730e-03,4,{'algoforest__n_estimators': 4},0.71875,0.62500,0.87500,0.739583,0.103120,95
9,0.041337,0.000472,0.012000,7.370010e-07,10,{'algoforest__n_estimators': 10},0.65625,0.75000,0.78125,0.729167,0.053115,97
2,0.037009,0.010819,0.016993,6.474254e-03,3,{'algoforest__n_estimators': 3},0.71875,0.46875,0.87500,0.687500,0.167316,98
1,0.024002,0.000818,0.009004,8.215979e-04,2,{'algoforest__n_estimators': 2},0.75000,0.62500,0.56250,0.645833,0.077951,99


In [14]:
modelrf.best_params_

{'algoforest__n_estimators': 56}

In [27]:
modelrf.score(X_train,y_train), modelrf.score(X_test,y_test)

(1.0, 0.875)

### DATA PREDICT 

In [28]:
datapred = (
    ["Most-Often","Seldom","Sometimes","Sometimes","YES","NO","YES","YES","NO","NO","YES","YES","YES","2 From 10","7 From 10","3 From 10"],
     ["Sometimes","Sometimes","Sometimes","Sometimes","NO","NO","YES","YES","NO","NO","NO","NO","NO","9 From 10","9 From 10","9 From 10"]
    
)

X_pred = pd.DataFrame (datapred, index=["Rudi","Budi"], columns=X.columns)
X_pred

Unnamed: 0,Sadness,Euphoric,Exhausted,Sleep_dissorder,Mood_Swing,Suicidal_thoughts,Anorxia,Authority_Respect,Aggressive_Response,Ignore_MoveOn,Nervous_Breakdown,Admit_Mistakes,Overthinking,Sexual_Activity,Concentration,Optimisim
Rudi,Most-Often,Seldom,Sometimes,Sometimes,YES,NO,YES,YES,NO,NO,YES,YES,YES,2 From 10,7 From 10,3 From 10
Budi,Sometimes,Sometimes,Sometimes,Sometimes,NO,NO,YES,YES,NO,NO,NO,NO,NO,9 From 10,9 From 10,9 From 10


In [29]:
modelrf.predict(X_pred)

array(['Bipolar Type-2', 'Normal'], dtype=object)

### PICKEL MODEL

In [33]:
import pickle

In [34]:
pickle.dump (modelrf,open("data/modelml.pkl","wb"))