In [1]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.metrics import roc_auc_score,roc_curve
from matplotlib import pyplot as plt
from sklearn.model_selection import GridSearchCV

In [2]:
diabetes = pd.read_csv('diabetes.csv')
df = diabetes.copy()
y = df['Outcome']
X = df.drop(['Outcome'], axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [3]:
from catboost import CatBoostClassifier

cat = CatBoostClassifier()
cat_model = cat.fit(X_train, y_train)

Learning rate set to 0.0079
0:	learn: 0.6879744	total: 167ms	remaining: 2m 46s
1:	learn: 0.6843414	total: 170ms	remaining: 1m 24s
2:	learn: 0.6805760	total: 173ms	remaining: 57.4s
3:	learn: 0.6765782	total: 175ms	remaining: 43.6s
4:	learn: 0.6720829	total: 178ms	remaining: 35.3s
5:	learn: 0.6681617	total: 180ms	remaining: 29.8s
6:	learn: 0.6641912	total: 182ms	remaining: 25.8s
7:	learn: 0.6602881	total: 184ms	remaining: 22.8s
8:	learn: 0.6562626	total: 186ms	remaining: 20.5s
9:	learn: 0.6528483	total: 188ms	remaining: 18.6s
10:	learn: 0.6487823	total: 190ms	remaining: 17s
11:	learn: 0.6454980	total: 191ms	remaining: 15.8s
12:	learn: 0.6418953	total: 193ms	remaining: 14.7s
13:	learn: 0.6374431	total: 195ms	remaining: 13.8s
14:	learn: 0.6339676	total: 197ms	remaining: 13s
15:	learn: 0.6304252	total: 199ms	remaining: 12.3s
16:	learn: 0.6266998	total: 201ms	remaining: 11.6s
17:	learn: 0.6237660	total: 204ms	remaining: 11.1s
18:	learn: 0.6204394	total: 206ms	remaining: 10.6s
19:	learn: 0.61

In [4]:
y_pred = cat_model.predict(X_test)
accuracy_score(y_test, y_pred)

0.7402597402597403

In [5]:
catb_params = {
    'iterations': [200,500],
    'learning_rate': [0.01,0.05, 0.1],
    'depth': [3,5,8]}

In [6]:
cat = CatBoostClassifier()
catb_cv = GridSearchCV(cat, catb_params, cv=10, verbose=2)
catb_cv.fit(X_train, y_train)

Fitting 10 folds for each of 18 candidates, totalling 180 fits
0:	learn: 0.6883388	total: 1.41ms	remaining: 280ms
1:	learn: 0.6843200	total: 3.01ms	remaining: 298ms
2:	learn: 0.6812977	total: 4.26ms	remaining: 280ms
3:	learn: 0.6771244	total: 5.5ms	remaining: 270ms
4:	learn: 0.6726807	total: 6.7ms	remaining: 261ms
5:	learn: 0.6685590	total: 8.03ms	remaining: 260ms
6:	learn: 0.6652424	total: 9.36ms	remaining: 258ms
7:	learn: 0.6619640	total: 10.5ms	remaining: 253ms
8:	learn: 0.6580245	total: 11.8ms	remaining: 250ms
9:	learn: 0.6545874	total: 13ms	remaining: 248ms
10:	learn: 0.6509705	total: 14.2ms	remaining: 244ms
11:	learn: 0.6467622	total: 15.4ms	remaining: 241ms
12:	learn: 0.6432900	total: 17ms	remaining: 244ms
13:	learn: 0.6405843	total: 18.1ms	remaining: 241ms
14:	learn: 0.6380430	total: 19.3ms	remaining: 238ms
15:	learn: 0.6345099	total: 20.5ms	remaining: 235ms
16:	learn: 0.6315432	total: 21.6ms	remaining: 233ms
17:	learn: 0.6280526	total: 22.7ms	remaining: 230ms
18:	learn: 0.6257

In [7]:
catb_cv.best_params_

{'depth': 8, 'iterations': 200, 'learning_rate': 0.01}

In [8]:
catb_tuned = CatBoostClassifier(depth=catb_cv.best_params_['depth'],
                                iterations=catb_cv.best_params_['iterations'],
                                learning_rate=catb_cv.best_params_['learning_rate'])
catb_tuned.fit(X_train, y_train)

0:	learn: 0.6863312	total: 4.17ms	remaining: 830ms
1:	learn: 0.6811157	total: 8.63ms	remaining: 855ms
2:	learn: 0.6752970	total: 12.4ms	remaining: 815ms
3:	learn: 0.6697432	total: 16ms	remaining: 783ms
4:	learn: 0.6641758	total: 19.6ms	remaining: 766ms
5:	learn: 0.6576656	total: 23.3ms	remaining: 752ms
6:	learn: 0.6521870	total: 27ms	remaining: 745ms
7:	learn: 0.6465564	total: 30.7ms	remaining: 737ms
8:	learn: 0.6407595	total: 34.5ms	remaining: 733ms
9:	learn: 0.6357725	total: 38.2ms	remaining: 727ms
10:	learn: 0.6299942	total: 41.9ms	remaining: 720ms
11:	learn: 0.6240480	total: 45.4ms	remaining: 711ms
12:	learn: 0.6196095	total: 49.1ms	remaining: 706ms
13:	learn: 0.6148715	total: 53.1ms	remaining: 706ms
14:	learn: 0.6092626	total: 56.8ms	remaining: 700ms
15:	learn: 0.6046764	total: 60.6ms	remaining: 697ms
16:	learn: 0.6012725	total: 64.1ms	remaining: 690ms
17:	learn: 0.5958121	total: 67.7ms	remaining: 684ms
18:	learn: 0.5903243	total: 71.6ms	remaining: 682ms
19:	learn: 0.5866024	total

<catboost.core.CatBoostClassifier at 0x2587921d670>

In [9]:
y_pred_tuned = catb_tuned.predict(X_test)
accuracy_score(y_test, y_pred_tuned)

0.7532467532467533