# CatBoost

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score
from catboost import CatBoostClassifier

from warnings import filterwarnings
filterwarnings('ignore')

## Model & Tahmin

In [3]:
diabetes = pd.read_csv("diabetes.csv")
df = diabetes.copy()
df = df.dropna()
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [4]:
y = df["Outcome"]
X = df.drop(['Outcome'], axis=1)
X = pd.DataFrame(X)
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.30,
                                                    random_state=42)

In [5]:
cat_model = CatBoostClassifier().fit(X_train, y_train)

Learning rate set to 0.0079
0:	learn: 0.6879744	total: 51ms	remaining: 51s
1:	learn: 0.6843414	total: 53.4ms	remaining: 26.6s
2:	learn: 0.6805760	total: 55.1ms	remaining: 18.3s
3:	learn: 0.6765782	total: 56.1ms	remaining: 14s
4:	learn: 0.6720829	total: 57.2ms	remaining: 11.4s
5:	learn: 0.6681617	total: 58.1ms	remaining: 9.62s
6:	learn: 0.6641912	total: 59ms	remaining: 8.38s
7:	learn: 0.6602881	total: 60ms	remaining: 7.44s
8:	learn: 0.6562626	total: 60.9ms	remaining: 6.71s
9:	learn: 0.6528483	total: 61.9ms	remaining: 6.13s
10:	learn: 0.6487823	total: 62.9ms	remaining: 5.65s
11:	learn: 0.6454980	total: 63.8ms	remaining: 5.25s
12:	learn: 0.6418953	total: 64.7ms	remaining: 4.91s
13:	learn: 0.6374431	total: 65.7ms	remaining: 4.62s
14:	learn: 0.6339676	total: 66.7ms	remaining: 4.38s
15:	learn: 0.6304252	total: 67.6ms	remaining: 4.16s
16:	learn: 0.6266998	total: 68.6ms	remaining: 3.96s
17:	learn: 0.6237660	total: 69.5ms	remaining: 3.79s
18:	learn: 0.6204394	total: 70.4ms	remaining: 3.63s
19:	

In [6]:
y_pred = cat_model.predict(X_test)
accuracy_score(y_test, y_pred)

0.7402597402597403

## Model Tuning

In [7]:
catb_params = {
    'iterations': [200,500],
    'learning_rate': [0.01,0.05, 0.1],
    'depth': [3,5,8] }

In [8]:
catb = CatBoostClassifier()
catb_cv_model = GridSearchCV(catb, catb_params, cv=5, n_jobs = -1, verbose = 2)
catb_cv_model.fit(X_train, y_train)
catb_cv_model.best_params_

Fitting 5 folds for each of 18 candidates, totalling 90 fits
0:	learn: 0.6884954	total: 48.2ms	remaining: 9.6s
1:	learn: 0.6829677	total: 49ms	remaining: 4.85s
2:	learn: 0.6799672	total: 49.7ms	remaining: 3.26s
3:	learn: 0.6760577	total: 50.3ms	remaining: 2.46s
4:	learn: 0.6703680	total: 51ms	remaining: 1.99s
5:	learn: 0.6657474	total: 51.9ms	remaining: 1.68s
6:	learn: 0.6620801	total: 53.7ms	remaining: 1.48s
7:	learn: 0.6588847	total: 56ms	remaining: 1.34s
8:	learn: 0.6551782	total: 58.5ms	remaining: 1.24s
9:	learn: 0.6511625	total: 59.2ms	remaining: 1.12s
10:	learn: 0.6479557	total: 59.9ms	remaining: 1.03s
11:	learn: 0.6445218	total: 68.7ms	remaining: 1.08s
12:	learn: 0.6409399	total: 69.5ms	remaining: 999ms
13:	learn: 0.6373448	total: 72.2ms	remaining: 959ms
14:	learn: 0.6343412	total: 74.6ms	remaining: 920ms
15:	learn: 0.6308872	total: 75.3ms	remaining: 866ms
16:	learn: 0.6259389	total: 77.8ms	remaining: 837ms
17:	learn: 0.6225832	total: 81.1ms	remaining: 820ms
18:	learn: 0.6194172

{'depth': 8, 'iterations': 200, 'learning_rate': 0.01}

In [9]:
catb = CatBoostClassifier(iterations = 200,
                          learning_rate = 0.01,
                          depth = 8)

catb_tuned = catb.fit(X_train, y_train)
y_pred = catb_tuned.predict(X_test)

0:	learn: 0.6863312	total: 4.23ms	remaining: 841ms
1:	learn: 0.6811157	total: 8.08ms	remaining: 800ms
2:	learn: 0.6752970	total: 11.3ms	remaining: 739ms
3:	learn: 0.6697432	total: 13.9ms	remaining: 682ms
4:	learn: 0.6641758	total: 16.8ms	remaining: 654ms
5:	learn: 0.6576656	total: 20ms	remaining: 647ms
6:	learn: 0.6521870	total: 23.2ms	remaining: 641ms
7:	learn: 0.6465564	total: 25.6ms	remaining: 615ms
8:	learn: 0.6407595	total: 28.3ms	remaining: 600ms
9:	learn: 0.6357725	total: 30.8ms	remaining: 584ms
10:	learn: 0.6299942	total: 33.1ms	remaining: 568ms
11:	learn: 0.6240480	total: 35.8ms	remaining: 560ms
12:	learn: 0.6196095	total: 38.6ms	remaining: 555ms
13:	learn: 0.6148715	total: 41.6ms	remaining: 552ms
14:	learn: 0.6092626	total: 43.7ms	remaining: 539ms
15:	learn: 0.6046764	total: 46.3ms	remaining: 532ms
16:	learn: 0.6012725	total: 48.8ms	remaining: 525ms
17:	learn: 0.5958121	total: 52.2ms	remaining: 528ms
18:	learn: 0.5903243	total: 54.8ms	remaining: 522ms
19:	learn: 0.5866024	tot

In [10]:
y_pred = catb_tuned.predict(X_test)
accuracy_score(y_test, y_pred)

0.7532467532467533