In [1]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
import seaborn as sns
from sklearn.preprocessing import scale, StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.metrics import roc_auc_score,roc_curve
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn import tree
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
from warnings import filterwarnings
from skompiler import skompile
filterwarnings('ignore')

In [2]:
diabetes = pd.read_csv('diabetes.csv')
df = diabetes.copy()
df = df.dropna()
y = df['Outcome']
X = df.drop(['Outcome'],axis = 1)
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.30, random_state = 44)

In [3]:
ctb = CatBoostClassifier()
ctb_model = ctb.fit(X_train, y_train)

Learning rate set to 0.0079
0:	learn: 0.6886863	total: 271ms	remaining: 4m 30s
1:	learn: 0.6852782	total: 273ms	remaining: 2m 16s
2:	learn: 0.6818015	total: 275ms	remaining: 1m 31s
3:	learn: 0.6781808	total: 278ms	remaining: 1m 9s
4:	learn: 0.6743971	total: 279ms	remaining: 55.6s
5:	learn: 0.6707236	total: 281ms	remaining: 46.5s
6:	learn: 0.6668372	total: 283ms	remaining: 40.1s
7:	learn: 0.6634877	total: 285ms	remaining: 35.3s
8:	learn: 0.6595892	total: 287ms	remaining: 31.6s
9:	learn: 0.6568778	total: 288ms	remaining: 28.6s
10:	learn: 0.6529774	total: 290ms	remaining: 26.1s
11:	learn: 0.6494063	total: 292ms	remaining: 24s
12:	learn: 0.6457360	total: 293ms	remaining: 22.3s
13:	learn: 0.6429862	total: 295ms	remaining: 20.8s
14:	learn: 0.6400116	total: 297ms	remaining: 19.5s
15:	learn: 0.6365383	total: 299ms	remaining: 18.4s
16:	learn: 0.6341990	total: 301ms	remaining: 17.4s
17:	learn: 0.6315444	total: 303ms	remaining: 16.5s
18:	learn: 0.6285903	total: 305ms	remaining: 15.7s
19:	learn: 0

In [4]:
y_pred = ctb_model.predict(X_test)
accuracy_score(y_test, y_pred)

0.7619047619047619

In [5]:
#TUNING

In [7]:
ctb_params = {
    'iterations':[200, 500],
    'learning_rate':[0.01, 0.05, 0.1],
    'depth':[3,5,8]
}
ctb = CatBoostClassifier()

In [8]:
lgbm_cv_model = GridSearchCV(ctb, ctb_params, cv = 10, n_jobs = -1, verbose = 2).fit(X_train, y_train)

Fitting 10 folds for each of 18 candidates, totalling 180 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    9.6s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:   49.1s
[Parallel(n_jobs=-1)]: Done 180 out of 180 | elapsed:  1.4min finished


0:	learn: 0.6874959	total: 2.02ms	remaining: 1.01s
1:	learn: 0.6828716	total: 3.72ms	remaining: 926ms
2:	learn: 0.6782000	total: 5.37ms	remaining: 890ms
3:	learn: 0.6739374	total: 7.08ms	remaining: 878ms
4:	learn: 0.6697008	total: 8.72ms	remaining: 864ms
5:	learn: 0.6660428	total: 10.7ms	remaining: 884ms
6:	learn: 0.6617083	total: 12.7ms	remaining: 893ms
7:	learn: 0.6576917	total: 14.4ms	remaining: 887ms
8:	learn: 0.6532995	total: 15.9ms	remaining: 868ms
9:	learn: 0.6496044	total: 17.4ms	remaining: 852ms
10:	learn: 0.6464125	total: 18.9ms	remaining: 842ms
11:	learn: 0.6416357	total: 20.4ms	remaining: 832ms
12:	learn: 0.6382670	total: 21.8ms	remaining: 816ms
13:	learn: 0.6342218	total: 23.9ms	remaining: 829ms
14:	learn: 0.6304498	total: 25.4ms	remaining: 823ms
15:	learn: 0.6277213	total: 27ms	remaining: 818ms
16:	learn: 0.6253428	total: 28.4ms	remaining: 808ms
17:	learn: 0.6213522	total: 29.9ms	remaining: 800ms
18:	learn: 0.6180509	total: 31.3ms	remaining: 792ms
19:	learn: 0.6152436	tot

In [9]:
lgbm_cv_model.best_params_

{'depth': 5, 'iterations': 500, 'learning_rate': 0.01}

In [12]:
lgbm_tuned = CatBoostClassifier(
     learning_rate = 0.01,
     iterations = 500,
     depth = 5).fit(X_train, y_train)

0:	learn: 0.6874959	total: 1.55ms	remaining: 774ms
1:	learn: 0.6828716	total: 2.92ms	remaining: 728ms
2:	learn: 0.6782000	total: 4.2ms	remaining: 695ms
3:	learn: 0.6739374	total: 5.4ms	remaining: 669ms
4:	learn: 0.6697008	total: 6.62ms	remaining: 655ms
5:	learn: 0.6660428	total: 7.92ms	remaining: 653ms
6:	learn: 0.6617083	total: 9.54ms	remaining: 672ms
7:	learn: 0.6576917	total: 10.9ms	remaining: 668ms
8:	learn: 0.6532995	total: 12.4ms	remaining: 679ms
9:	learn: 0.6496044	total: 13.9ms	remaining: 683ms
10:	learn: 0.6464125	total: 15.2ms	remaining: 676ms
11:	learn: 0.6416357	total: 16.5ms	remaining: 671ms
12:	learn: 0.6382670	total: 17.9ms	remaining: 670ms
13:	learn: 0.6342218	total: 19.3ms	remaining: 669ms
14:	learn: 0.6304498	total: 21ms	remaining: 679ms
15:	learn: 0.6277213	total: 22.5ms	remaining: 682ms
16:	learn: 0.6253428	total: 23.9ms	remaining: 678ms
17:	learn: 0.6213522	total: 25.4ms	remaining: 680ms
18:	learn: 0.6180509	total: 27.3ms	remaining: 691ms
19:	learn: 0.6152436	total

In [14]:
y_pred = lgbm_cv_model.predict(X_test)
accuracy_score(y_test, y_pred)

0.7792207792207793