In [255]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [256]:
df_original = pd.read_csv("car_acceptability.txt", sep = ",", header = None)
df_original.head()

Unnamed: 0,0,1,2,3,4,5,6
0,vhigh,vhigh,2,2,small,low,unacc
1,vhigh,vhigh,2,2,small,med,unacc
2,vhigh,vhigh,2,?,small,high,unacc
3,vhigh,vhigh,2,2,med,low,unacc
4,vhigh,vhigh,2,2,med,?,unacc


In [257]:
df = df_original.copy()

In [258]:
df.columns = ["price", "maint", "doors", "capacity", "lugg_capacity", "safety", "situation"] # sütunları yeniden adlandırdık

In [259]:
df.head()

Unnamed: 0,price,maint,doors,capacity,lugg_capacity,safety,situation
0,vhigh,vhigh,2,2,small,low,unacc
1,vhigh,vhigh,2,2,small,med,unacc
2,vhigh,vhigh,2,?,small,high,unacc
3,vhigh,vhigh,2,2,med,low,unacc
4,vhigh,vhigh,2,2,med,?,unacc


In [260]:
df.info() # datamız genel olarak temiz gözüküyor lakin bütün sütunlar object değerinde sayısal sütunların içinde str değerleri olabilir.

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1729 entries, 0 to 1728
Data columns (total 7 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   price          1728 non-null   object
 1   maint          1729 non-null   object
 2   doors          1729 non-null   object
 3   capacity       1729 non-null   object
 4   lugg_capacity  1729 non-null   object
 5   safety         1728 non-null   object
 6   situation      1729 non-null   object
dtypes: object(7)
memory usage: 94.7+ KB


In [261]:
# sütünlardaki bütün kategoilere bakıyoruz
for col in df.columns:
    print("****************************")
    print(f"{col} sütunu")
    print("****************************")
    print(df[col].value_counts())

****************************
price sütunu
****************************
high     432
med      431
vhigh    430
low      430
?          3
-          1
düsük      1
Name: price, dtype: int64
****************************
maint sütunu
****************************
high     432
med      432
low      432
vhigh    429
?          4
Name: maint, dtype: int64
****************************
doors sütunu
****************************
3        432
2        430
4        430
5more    430
?          3
-          1
44         1
iki        1
5+         1
Name: doors, dtype: int64
****************************
capacity sütunu
****************************
4       576
more    576
2       574
?         3
Name: capacity, dtype: int64
****************************
lugg_capacity sütunu
****************************
small    576
big      576
med      575
?          2
Name: lugg_capacity, dtype: int64
****************************
safety sütunu
****************************
high    576
med     574
low     573
?         2


In [262]:
# Target sütunu dışında verimiz dengeli dağılmış gözüküyor, ayrıyeten verimizde bir kaç tane geçersiz 
# değerler var bu değerleri uygun bir şekilde ele alacağız.

In [263]:
# geçersiz değerleri geçerli olabilecek değerlere dönüştürüyoruz 
df.replace({"?": np.nan, "*": np.nan, "-": np.nan, "düsük": "low", "5more": "5", "5+": "5", "iki": "2", "44":"4", "more": "5"}, inplace = True)

In [264]:
for col in df.columns:
    print("****************************")
    print(f"{col} sütunu")
    print("****************************")
    print(df[col].value_counts())

****************************
price sütunu
****************************
high     432
med      431
low      431
vhigh    430
Name: price, dtype: int64
****************************
maint sütunu
****************************
high     432
med      432
low      432
vhigh    429
Name: maint, dtype: int64
****************************
doors sütunu
****************************
3    432
2    431
4    431
5    431
Name: doors, dtype: int64
****************************
capacity sütunu
****************************
4    576
5    576
2    574
Name: capacity, dtype: int64
****************************
lugg_capacity sütunu
****************************
small    576
big      576
med      575
Name: lugg_capacity, dtype: int64
****************************
safety sütunu
****************************
high    576
med     574
low     573
Name: safety, dtype: int64
****************************
situation sütunu
****************************
unacc    1209
acc       384
good       69
vgood      65
Name: situation, dtyp

In [358]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1729 entries, 0 to 1728
Data columns (total 7 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   price          1724 non-null   object
 1   maint          1725 non-null   object
 2   doors          1725 non-null   object
 3   capacity       1726 non-null   object
 4   lugg_capacity  1727 non-null   object
 5   safety         1723 non-null   object
 6   situation      1727 non-null   object
dtypes: object(7)
memory usage: 94.7+ KB


In [359]:
df.head()

Unnamed: 0,price,maint,doors,capacity,lugg_capacity,safety,situation
0,vhigh,vhigh,2,2.0,small,low,unacc
1,vhigh,vhigh,2,2.0,small,med,unacc
2,vhigh,vhigh,2,,small,high,unacc
3,vhigh,vhigh,2,2.0,med,low,unacc
4,vhigh,vhigh,2,2.0,med,,unacc


In [360]:
from sklearn.model_selection import train_test_split

In [361]:
y = df["situation"]
X = df.drop("situation", axis= 1)

In [362]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,random_state=42) 

In [363]:
X_train

Unnamed: 0,price,maint,doors,capacity,lugg_capacity,safety
107,vhigh,vhigh,5,5,big,high
901,med,vhigh,3,4,small,med
1710,low,low,5,4,small,low
706,high,med,4,2,med,med
678,high,med,3,2,med,low
...,...,...,...,...,...,...
1130,med,med,3,5,med,high
1294,med,low,5,5,big,med
860,high,low,5,5,med,high
1459,low,high,4,2,small,med


In [364]:
y_train

107     unacc
901     unacc
1710    unacc
706     unacc
678     unacc
        ...  
1130    vgood
1294     good
860       acc
1459    unacc
1126      acc
Name: situation, Length: 1383, dtype: object

In [316]:
y_test

599     unacc
1201      acc
628     unacc
1711      acc
1263    unacc
        ...  
100     unacc
274     unacc
1206    unacc
101     unacc
1084    unacc
Name: situation, Length: 346, dtype: object

In [270]:
X_train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1383 entries, 107 to 1126
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   price          1378 non-null   object
 1   maint          1379 non-null   object
 2   doors          1380 non-null   object
 3   capacity       1380 non-null   object
 4   lugg_capacity  1381 non-null   object
 5   safety         1377 non-null   object
dtypes: object(6)
memory usage: 75.6+ KB


In [271]:
from sklearn.impute import SimpleImputer

In [272]:
# verimzde olan nan değerleri mode değerleriyle dolduruyoruz
imputer = SimpleImputer(strategy = "most_frequent")

In [318]:
X_train = imputer.fit_transform(X_train)
X_test = imputer.transform(X_test)
y_train = imputer.fit_transform(np.array(y_train).reshape(-1,1))
y_test = imputer.transform(np.array(y_test).reshape(-1,1))

In [325]:
X_train = pd.DataFrame(X_train , columns = ["price", "maint", "doors", "capacity", "lugg_capacity", "safety"])
X_test = pd.DataFrame(X_test , columns = ["price", "maint", "doors", "capacity", "lugg_capacity", "safety"])
y_train = pd.DataFrame(y_train)
y_test = pd.DataFrame(y_test)

In [326]:
X_train

Unnamed: 0,price,maint,doors,capacity,lugg_capacity,safety
0,vhigh,vhigh,5,5,big,high
1,med,vhigh,3,4,small,med
2,low,low,5,4,small,low
3,high,med,4,2,med,med
4,high,med,3,2,med,low
...,...,...,...,...,...,...
1378,med,med,3,5,med,high
1379,med,low,5,5,big,med
1380,high,low,5,5,med,high
1381,low,high,4,2,small,med


In [327]:
X_train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1383 entries, 0 to 1382
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   price          1383 non-null   object
 1   maint          1383 non-null   object
 2   doors          1383 non-null   object
 3   capacity       1383 non-null   object
 4   lugg_capacity  1383 non-null   object
 5   safety         1383 non-null   object
dtypes: object(6)
memory usage: 65.0+ KB


In [328]:
for col in X_train.columns:
    print("****************************")
    print(f"{col} sütunu")
    print("****************************")
    print(X_train[col].value_counts())

****************************
price sütunu
****************************
med      359
low      348
high     340
vhigh    336
Name: price, dtype: int64
****************************
maint sütunu
****************************
high     362
med      355
vhigh    335
low      331
Name: maint, dtype: int64
****************************
doors sütunu
****************************
2    352
4    346
5    343
3    342
Name: doors, dtype: int64
****************************
capacity sütunu
****************************
4    473
5    462
2    448
Name: capacity, dtype: int64
****************************
lugg_capacity sütunu
****************************
big      468
small    458
med      457
Name: lugg_capacity, dtype: int64
****************************
safety sütunu
****************************
low     469
med     461
high    453
Name: safety, dtype: int64


In [329]:
y_train.value_counts()

unacc    970
acc      304
good      57
vgood     52
dtype: int64

In [330]:
y_test.value_counts()

unacc    241
acc       80
vgood     13
good      12
dtype: int64

In [331]:
y_test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 346 entries, 0 to 345
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   0       346 non-null    object
dtypes: object(1)
memory usage: 2.8+ KB


In [332]:
y_train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1383 entries, 0 to 1382
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   0       1383 non-null   object
dtypes: object(1)
memory usage: 10.9+ KB


In [333]:
# Verimiz ağırlıklı olarak kategorik değişkenlerden oluştuğundan dolayı Catboost algoritmasını kullanmayı tercih ediyorum.
from catboost import CatBoostClassifier, Pool

In [334]:
train_pool = Pool(X_train, y_train, cat_features = list(X_train.columns))
test_pool = Pool(X_test, y_test, cat_features = list(X_test.columns))

In [335]:
# optimizasyon yaparken denenecek değerler
param_dist = {
    "learning_rate": [0.03, 0.1, 0.3],
    "depth": [3, 5, 7],
    "l2_leaf_reg": [1, 3, 5] # l2_leaf_regularization
}

In [336]:
model = CatBoostClassifier(n_estimators = 100)

In [337]:
# optimizasyonu randomized search ile kurmayı tercih ettim grid search ün fazla yavaş olacağını düşünmemden kaynaklı.
search_results = model.randomized_search(
                    param_dist, train_pool,
                    cv = 3, n_iter = 10,
                    partition_random_seed = 548574,
                    verbose = 1)

0:	learn: 1.3433502	test: 1.3450224	best: 1.3450224 (0)	total: 12.8ms	remaining: 1.27s
1:	learn: 1.3047236	test: 1.3079889	best: 1.3079889 (1)	total: 37ms	remaining: 1.81s
2:	learn: 1.2713619	test: 1.2780053	best: 1.2780053 (2)	total: 47.6ms	remaining: 1.54s
3:	learn: 1.2389596	test: 1.2469855	best: 1.2469855 (3)	total: 62.7ms	remaining: 1.5s
4:	learn: 1.2086503	test: 1.2172983	best: 1.2172983 (4)	total: 80.1ms	remaining: 1.52s
5:	learn: 1.1810778	test: 1.1906250	best: 1.1906250 (5)	total: 94.2ms	remaining: 1.48s
6:	learn: 1.1572698	test: 1.1682183	best: 1.1682183 (6)	total: 106ms	remaining: 1.41s
7:	learn: 1.1333283	test: 1.1451448	best: 1.1451448 (7)	total: 120ms	remaining: 1.38s
8:	learn: 1.1110307	test: 1.1235888	best: 1.1235888 (8)	total: 129ms	remaining: 1.3s
9:	learn: 1.0901436	test: 1.1035202	best: 1.1035202 (9)	total: 146ms	remaining: 1.31s
10:	learn: 1.0701731	test: 1.0843586	best: 1.0843586 (10)	total: 159ms	remaining: 1.29s
11:	learn: 1.0516084	test: 1.0668407	best: 1.06684

99:	learn: 0.4605050	test: 0.5090282	best: 0.5090282 (99)	total: 1.77s	remaining: 0us

bestTest = 0.5090282351
bestIteration = 99

0:	loss: 0.5090282	best: 0.5090282 (0)	total: 1.91s	remaining: 17.2s
0:	learn: 1.2484937	test: 1.2539111	best: 1.2539111 (0)	total: 16.6ms	remaining: 1.64s
1:	learn: 1.1507927	test: 1.1601460	best: 1.1601460 (1)	total: 42.7ms	remaining: 2.09s
2:	learn: 1.0788298	test: 1.0946591	best: 1.0946591 (2)	total: 61.8ms	remaining: 2s
3:	learn: 1.0184991	test: 1.0366348	best: 1.0366348 (3)	total: 92.9ms	remaining: 2.23s
4:	learn: 0.9679757	test: 0.9875683	best: 0.9875683 (4)	total: 123ms	remaining: 2.34s
5:	learn: 0.9274264	test: 0.9480675	best: 0.9480675 (5)	total: 153ms	remaining: 2.4s
6:	learn: 0.8958174	test: 0.9181999	best: 0.9181999 (6)	total: 165ms	remaining: 2.19s
7:	learn: 0.8369079	test: 0.8652352	best: 0.8652352 (7)	total: 189ms	remaining: 2.17s
8:	learn: 0.7913873	test: 0.8240125	best: 0.8240125 (8)	total: 206ms	remaining: 2.09s
9:	learn: 0.7553876	test: 

97:	learn: 0.3034989	test: 0.3417308	best: 0.3417308 (97)	total: 2.71s	remaining: 55.3ms
98:	learn: 0.3028585	test: 0.3406899	best: 0.3406899 (98)	total: 2.73s	remaining: 27.5ms
99:	learn: 0.3018810	test: 0.3394735	best: 0.3394735 (99)	total: 2.74s	remaining: 0us

bestTest = 0.3394734853
bestIteration = 99

1:	loss: 0.3394735	best: 0.3394735 (1)	total: 4.68s	remaining: 18.7s
0:	learn: 1.3448384	test: 1.3464182	best: 1.3464182 (0)	total: 14.3ms	remaining: 1.42s
1:	learn: 1.3073926	test: 1.3106286	best: 1.3106286 (1)	total: 27.8ms	remaining: 1.36s
2:	learn: 1.2746827	test: 1.2812523	best: 1.2812523 (2)	total: 36.6ms	remaining: 1.18s
3:	learn: 1.2434487	test: 1.2509677	best: 1.2509677 (3)	total: 55.4ms	remaining: 1.33s
4:	learn: 1.2145184	test: 1.2223654	best: 1.2223654 (4)	total: 73.1ms	remaining: 1.39s
5:	learn: 1.1874135	test: 1.1960843	best: 1.1960843 (5)	total: 95ms	remaining: 1.49s
6:	learn: 1.1621717	test: 1.1717066	best: 1.1717066 (6)	total: 113ms	remaining: 1.5s
7:	learn: 1.13883

90:	learn: 0.5127564	test: 0.5639614	best: 0.5639614 (90)	total: 1.73s	remaining: 171ms
91:	learn: 0.5112182	test: 0.5624588	best: 0.5624588 (91)	total: 1.75s	remaining: 152ms
92:	learn: 0.5073008	test: 0.5580109	best: 0.5580109 (92)	total: 1.79s	remaining: 135ms
93:	learn: 0.5036230	test: 0.5537930	best: 0.5537930 (93)	total: 1.81s	remaining: 116ms
94:	learn: 0.5019831	test: 0.5521245	best: 0.5521245 (94)	total: 1.85s	remaining: 97.3ms
95:	learn: 0.5002538	test: 0.5499728	best: 0.5499728 (95)	total: 1.87s	remaining: 77.8ms
96:	learn: 0.4983602	test: 0.5478092	best: 0.5478092 (96)	total: 1.88s	remaining: 58.2ms
97:	learn: 0.4954910	test: 0.5450319	best: 0.5450319 (97)	total: 1.9s	remaining: 38.7ms
98:	learn: 0.4921398	test: 0.5410605	best: 0.5410605 (98)	total: 1.91s	remaining: 19.3ms
99:	learn: 0.4910287	test: 0.5401611	best: 0.5401611 (99)	total: 1.94s	remaining: 0us

bestTest = 0.5401611441
bestIteration = 99

2:	loss: 0.5401611	best: 0.3394735 (1)	total: 6.63s	remaining: 15.5s
0:	l

88:	learn: 0.3374145	test: 0.3703556	best: 0.3703556 (88)	total: 1.73s	remaining: 214ms
89:	learn: 0.3364853	test: 0.3698199	best: 0.3698199 (89)	total: 1.75s	remaining: 195ms
90:	learn: 0.3355870	test: 0.3692851	best: 0.3692851 (90)	total: 1.77s	remaining: 175ms
91:	learn: 0.3347095	test: 0.3683102	best: 0.3683102 (91)	total: 1.79s	remaining: 156ms
92:	learn: 0.3324235	test: 0.3652477	best: 0.3652477 (92)	total: 1.81s	remaining: 136ms
93:	learn: 0.3324008	test: 0.3652541	best: 0.3652477 (92)	total: 1.82s	remaining: 116ms
94:	learn: 0.3323865	test: 0.3652501	best: 0.3652477 (92)	total: 1.84s	remaining: 96.9ms
95:	learn: 0.3322877	test: 0.3652608	best: 0.3652477 (92)	total: 1.85s	remaining: 77.3ms
96:	learn: 0.3308610	test: 0.3637033	best: 0.3637033 (96)	total: 1.88s	remaining: 58ms
97:	learn: 0.3296676	test: 0.3625808	best: 0.3625808 (97)	total: 1.89s	remaining: 38.6ms
98:	learn: 0.3295806	test: 0.3625949	best: 0.3625808 (97)	total: 1.91s	remaining: 19.3ms
99:	learn: 0.3286302	test: 0.

83:	learn: 0.2517967	test: 0.2848020	best: 0.2848020 (83)	total: 1.33s	remaining: 254ms
84:	learn: 0.2482206	test: 0.2816888	best: 0.2816888 (84)	total: 1.35s	remaining: 239ms
85:	learn: 0.2476367	test: 0.2814033	best: 0.2814033 (85)	total: 1.38s	remaining: 224ms
86:	learn: 0.2471654	test: 0.2813595	best: 0.2813595 (86)	total: 1.4s	remaining: 209ms
87:	learn: 0.2465629	test: 0.2810662	best: 0.2810662 (87)	total: 1.41s	remaining: 193ms
88:	learn: 0.2463978	test: 0.2811693	best: 0.2810662 (87)	total: 1.43s	remaining: 177ms
89:	learn: 0.2443650	test: 0.2809798	best: 0.2809798 (89)	total: 1.44s	remaining: 160ms
90:	learn: 0.2443155	test: 0.2809270	best: 0.2809270 (90)	total: 1.46s	remaining: 144ms
91:	learn: 0.2431334	test: 0.2797822	best: 0.2797822 (91)	total: 1.47s	remaining: 128ms
92:	learn: 0.2421491	test: 0.2791792	best: 0.2791792 (92)	total: 1.49s	remaining: 112ms
93:	learn: 0.2417871	test: 0.2793140	best: 0.2791792 (92)	total: 1.5s	remaining: 95.8ms
94:	learn: 0.2392094	test: 0.2733

81:	learn: 0.2534084	test: 0.2959821	best: 0.2959821 (81)	total: 2.2s	remaining: 483ms
82:	learn: 0.2481404	test: 0.2898011	best: 0.2898011 (82)	total: 2.23s	remaining: 457ms
83:	learn: 0.2452724	test: 0.2888410	best: 0.2888410 (83)	total: 2.27s	remaining: 432ms
84:	learn: 0.2438675	test: 0.2889282	best: 0.2888410 (83)	total: 2.29s	remaining: 405ms
85:	learn: 0.2433978	test: 0.2889587	best: 0.2888410 (83)	total: 2.32s	remaining: 378ms
86:	learn: 0.2425930	test: 0.2883581	best: 0.2883581 (86)	total: 2.35s	remaining: 351ms
87:	learn: 0.2402428	test: 0.2870316	best: 0.2870316 (87)	total: 2.37s	remaining: 323ms
88:	learn: 0.2380860	test: 0.2832941	best: 0.2832941 (88)	total: 2.4s	remaining: 296ms
89:	learn: 0.2368059	test: 0.2813899	best: 0.2813899 (89)	total: 2.43s	remaining: 270ms
90:	learn: 0.2365476	test: 0.2810381	best: 0.2810381 (90)	total: 2.46s	remaining: 243ms
91:	learn: 0.2360288	test: 0.2809063	best: 0.2809063 (91)	total: 2.48s	remaining: 216ms
92:	learn: 0.2354074	test: 0.28072

74:	learn: 0.1290253	test: 0.2152698	best: 0.2151170 (73)	total: 1.93s	remaining: 642ms
75:	learn: 0.1285700	test: 0.2149840	best: 0.2149840 (75)	total: 1.96s	remaining: 618ms
76:	learn: 0.1275811	test: 0.2150530	best: 0.2149840 (75)	total: 1.98s	remaining: 592ms
77:	learn: 0.1265090	test: 0.2134007	best: 0.2134007 (77)	total: 2.01s	remaining: 567ms
78:	learn: 0.1256646	test: 0.2135331	best: 0.2134007 (77)	total: 2.04s	remaining: 541ms
79:	learn: 0.1238489	test: 0.2133373	best: 0.2133373 (79)	total: 2.06s	remaining: 516ms
80:	learn: 0.1234101	test: 0.2129985	best: 0.2129985 (80)	total: 2.09s	remaining: 490ms
81:	learn: 0.1222974	test: 0.2133403	best: 0.2129985 (80)	total: 2.11s	remaining: 464ms
82:	learn: 0.1210522	test: 0.2136970	best: 0.2129985 (80)	total: 2.14s	remaining: 439ms
83:	learn: 0.1203356	test: 0.2118363	best: 0.2118363 (83)	total: 2.18s	remaining: 415ms
84:	learn: 0.1192401	test: 0.2113155	best: 0.2113155 (84)	total: 2.21s	remaining: 390ms
85:	learn: 0.1185274	test: 0.210

74:	learn: 0.1614543	test: 0.2193863	best: 0.2190356 (72)	total: 2.03s	remaining: 675ms
75:	learn: 0.1605645	test: 0.2201947	best: 0.2190356 (72)	total: 2.05s	remaining: 649ms
76:	learn: 0.1598595	test: 0.2198985	best: 0.2190356 (72)	total: 2.08s	remaining: 622ms
77:	learn: 0.1591665	test: 0.2201587	best: 0.2190356 (72)	total: 2.11s	remaining: 594ms
78:	learn: 0.1584109	test: 0.2188780	best: 0.2188780 (78)	total: 2.13s	remaining: 566ms
79:	learn: 0.1571006	test: 0.2179749	best: 0.2179749 (79)	total: 2.16s	remaining: 540ms
80:	learn: 0.1554125	test: 0.2169841	best: 0.2169841 (80)	total: 2.18s	remaining: 512ms
81:	learn: 0.1544233	test: 0.2163148	best: 0.2163148 (81)	total: 2.21s	remaining: 485ms
82:	learn: 0.1528380	test: 0.2156142	best: 0.2156142 (82)	total: 2.23s	remaining: 458ms
83:	learn: 0.1520768	test: 0.2156687	best: 0.2156142 (82)	total: 2.26s	remaining: 431ms
84:	learn: 0.1517790	test: 0.2156290	best: 0.2156142 (82)	total: 2.29s	remaining: 405ms
85:	learn: 0.1511356	test: 0.216

67:	learn: 0.5737479	test: 0.6272971	best: 0.6272971 (67)	total: 1.5s	remaining: 709ms
68:	learn: 0.5710963	test: 0.6248411	best: 0.6248411 (68)	total: 1.53s	remaining: 688ms
69:	learn: 0.5687517	test: 0.6227490	best: 0.6227490 (69)	total: 1.55s	remaining: 666ms
70:	learn: 0.5675785	test: 0.6214943	best: 0.6214943 (70)	total: 1.56s	remaining: 637ms
71:	learn: 0.5652435	test: 0.6196585	best: 0.6196585 (71)	total: 1.58s	remaining: 616ms
72:	learn: 0.5627828	test: 0.6173490	best: 0.6173490 (72)	total: 1.61s	remaining: 596ms
73:	learn: 0.5604939	test: 0.6151755	best: 0.6151755 (73)	total: 1.64s	remaining: 577ms
74:	learn: 0.5581590	test: 0.6130237	best: 0.6130237 (74)	total: 1.68s	remaining: 558ms
75:	learn: 0.5560157	test: 0.6110783	best: 0.6110783 (75)	total: 1.7s	remaining: 537ms
76:	learn: 0.5548059	test: 0.6099194	best: 0.6099194 (76)	total: 1.72s	remaining: 514ms
77:	learn: 0.5521272	test: 0.6071472	best: 0.6071472 (77)	total: 1.75s	remaining: 493ms
78:	learn: 0.5490519	test: 0.60351

61:	learn: 0.2811693	test: 0.3179191	best: 0.3179191 (61)	total: 2.27s	remaining: 1.39s
62:	learn: 0.2785067	test: 0.3170508	best: 0.3170508 (62)	total: 2.32s	remaining: 1.36s
63:	learn: 0.2739562	test: 0.3103018	best: 0.3103018 (63)	total: 2.37s	remaining: 1.33s
64:	learn: 0.2692738	test: 0.3063205	best: 0.3063205 (64)	total: 2.42s	remaining: 1.3s
65:	learn: 0.2688166	test: 0.3061391	best: 0.3061391 (65)	total: 2.46s	remaining: 1.27s
66:	learn: 0.2680282	test: 0.3056340	best: 0.3056340 (66)	total: 2.49s	remaining: 1.23s
67:	learn: 0.2673179	test: 0.3052332	best: 0.3052332 (67)	total: 2.53s	remaining: 1.19s
68:	learn: 0.2633906	test: 0.3010658	best: 0.3010658 (68)	total: 2.58s	remaining: 1.16s
69:	learn: 0.2605491	test: 0.2985228	best: 0.2985228 (69)	total: 2.64s	remaining: 1.13s
70:	learn: 0.2594237	test: 0.2976254	best: 0.2976254 (70)	total: 2.69s	remaining: 1.1s
71:	learn: 0.2569107	test: 0.2964098	best: 0.2964098 (71)	total: 2.73s	remaining: 1.06s
72:	learn: 0.2530874	test: 0.29139

55:	learn: 0.1432615	test: 0.1857685	best: 0.1857685 (55)	total: 2.29s	remaining: 1.8s
56:	learn: 0.1388152	test: 0.1850543	best: 0.1850543 (56)	total: 2.32s	remaining: 1.75s
57:	learn: 0.1365675	test: 0.1832338	best: 0.1832338 (57)	total: 2.35s	remaining: 1.7s
58:	learn: 0.1360816	test: 0.1831816	best: 0.1831816 (58)	total: 2.38s	remaining: 1.65s
59:	learn: 0.1343867	test: 0.1813625	best: 0.1813625 (59)	total: 2.4s	remaining: 1.6s
60:	learn: 0.1321663	test: 0.1778023	best: 0.1778023 (60)	total: 2.43s	remaining: 1.55s
61:	learn: 0.1316106	test: 0.1778252	best: 0.1778023 (60)	total: 2.45s	remaining: 1.5s
62:	learn: 0.1296928	test: 0.1757665	best: 0.1757665 (62)	total: 2.48s	remaining: 1.46s
63:	learn: 0.1274135	test: 0.1748918	best: 0.1748918 (63)	total: 2.5s	remaining: 1.41s
64:	learn: 0.1256435	test: 0.1738562	best: 0.1738562 (64)	total: 2.54s	remaining: 1.36s
65:	learn: 0.1235938	test: 0.1727259	best: 0.1727259 (65)	total: 2.56s	remaining: 1.32s
66:	learn: 0.1229132	test: 0.1717224	b

52:	learn: 0.1774458	test: 0.2452030	best: 0.2445965 (51)	total: 1.88s	remaining: 1.67s
53:	learn: 0.1758850	test: 0.2459411	best: 0.2445965 (51)	total: 1.91s	remaining: 1.63s
54:	learn: 0.1730310	test: 0.2494349	best: 0.2445965 (51)	total: 1.96s	remaining: 1.6s
55:	learn: 0.1686719	test: 0.2401255	best: 0.2401255 (55)	total: 1.99s	remaining: 1.57s
56:	learn: 0.1679374	test: 0.2412908	best: 0.2401255 (55)	total: 2.03s	remaining: 1.53s
57:	learn: 0.1661056	test: 0.2419510	best: 0.2401255 (55)	total: 2.06s	remaining: 1.49s
58:	learn: 0.1639476	test: 0.2404497	best: 0.2401255 (55)	total: 2.1s	remaining: 1.46s
59:	learn: 0.1614425	test: 0.2406197	best: 0.2401255 (55)	total: 2.13s	remaining: 1.42s
60:	learn: 0.1573258	test: 0.2350410	best: 0.2350410 (60)	total: 2.17s	remaining: 1.39s
61:	learn: 0.1551919	test: 0.2340794	best: 0.2340794 (61)	total: 2.2s	remaining: 1.35s
62:	learn: 0.1524342	test: 0.2328135	best: 0.2328135 (62)	total: 2.23s	remaining: 1.31s
63:	learn: 0.1506649	test: 0.230619

51:	learn: 0.1684705	test: 0.2656253	best: 0.2656253 (51)	total: 1.52s	remaining: 1.4s
52:	learn: 0.1658586	test: 0.2665094	best: 0.2656253 (51)	total: 1.55s	remaining: 1.37s
53:	learn: 0.1628496	test: 0.2643229	best: 0.2643229 (53)	total: 1.58s	remaining: 1.34s
54:	learn: 0.1602161	test: 0.2616364	best: 0.2616364 (54)	total: 1.61s	remaining: 1.32s
55:	learn: 0.1562537	test: 0.2576535	best: 0.2576535 (55)	total: 1.65s	remaining: 1.29s
56:	learn: 0.1537940	test: 0.2567386	best: 0.2567386 (56)	total: 1.68s	remaining: 1.27s
57:	learn: 0.1505678	test: 0.2552370	best: 0.2552370 (57)	total: 1.72s	remaining: 1.24s
58:	learn: 0.1489139	test: 0.2561141	best: 0.2552370 (57)	total: 1.76s	remaining: 1.22s
59:	learn: 0.1460670	test: 0.2546735	best: 0.2546735 (59)	total: 1.79s	remaining: 1.2s
60:	learn: 0.1423895	test: 0.2523352	best: 0.2523352 (60)	total: 1.83s	remaining: 1.17s
61:	learn: 0.1407986	test: 0.2524918	best: 0.2523352 (60)	total: 1.86s	remaining: 1.14s
62:	learn: 0.1389461	test: 0.25012

In [338]:
search_results["params"]

{'depth': 5, 'l2_leaf_reg': 1, 'learning_rate': 0.3}

In [339]:
tuned_model = CatBoostClassifier(iterations=100, depth=5, learning_rate=0.3, l2_leaf_reg=1)

In [340]:
tuned_model.fit(train_pool)

0:	learn: 1.0310259	total: 21.4ms	remaining: 2.12s
1:	learn: 0.8997423	total: 55.2ms	remaining: 2.71s
2:	learn: 0.7512825	total: 76.7ms	remaining: 2.48s
3:	learn: 0.6621268	total: 121ms	remaining: 2.91s
4:	learn: 0.6139880	total: 144ms	remaining: 2.73s
5:	learn: 0.5565919	total: 187ms	remaining: 2.92s
6:	learn: 0.5201113	total: 223ms	remaining: 2.96s
7:	learn: 0.4682840	total: 256ms	remaining: 2.95s
8:	learn: 0.4319451	total: 284ms	remaining: 2.87s
9:	learn: 0.4016450	total: 309ms	remaining: 2.78s
10:	learn: 0.3735912	total: 335ms	remaining: 2.71s
11:	learn: 0.3567009	total: 362ms	remaining: 2.65s
12:	learn: 0.3454607	total: 393ms	remaining: 2.63s
13:	learn: 0.3445984	total: 399ms	remaining: 2.45s
14:	learn: 0.3340275	total: 427ms	remaining: 2.42s
15:	learn: 0.3176881	total: 456ms	remaining: 2.39s
16:	learn: 0.3128029	total: 484ms	remaining: 2.37s
17:	learn: 0.3074937	total: 510ms	remaining: 2.32s
18:	learn: 0.2986117	total: 535ms	remaining: 2.28s
19:	learn: 0.2908306	total: 562ms	rema

<catboost.core.CatBoostClassifier at 0x1fba17f59c0>

In [341]:
y_predict = tuned_model.predict(X_test)

In [342]:
y_predict

array([['unacc'],
       ['unacc'],
       ['unacc'],
       ['acc'],
       ['unacc'],
       ['unacc'],
       ['unacc'],
       ['unacc'],
       ['unacc'],
       ['unacc'],
       ['unacc'],
       ['acc'],
       ['acc'],
       ['unacc'],
       ['unacc'],
       ['unacc'],
       ['acc'],
       ['unacc'],
       ['unacc'],
       ['acc'],
       ['unacc'],
       ['acc'],
       ['acc'],
       ['unacc'],
       ['acc'],
       ['unacc'],
       ['unacc'],
       ['unacc'],
       ['unacc'],
       ['unacc'],
       ['unacc'],
       ['unacc'],
       ['unacc'],
       ['vgood'],
       ['acc'],
       ['vgood'],
       ['vgood'],
       ['good'],
       ['unacc'],
       ['unacc'],
       ['unacc'],
       ['unacc'],
       ['acc'],
       ['acc'],
       ['acc'],
       ['unacc'],
       ['unacc'],
       ['acc'],
       ['unacc'],
       ['unacc'],
       ['unacc'],
       ['unacc'],
       ['acc'],
       ['unacc'],
       ['unacc'],
       ['acc'],
       ['unacc'],
     

In [343]:
y_train_predict = tuned_model.predict(X_train)

In [344]:
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

In [345]:
accuracy_score(y_test, y_predict)

0.930635838150289

In [354]:
print(classification_report(y_train, y_train_predict, digits = 2))

              precision    recall  f1-score   support

         acc       0.88      0.94      0.91       304
        good       0.89      0.86      0.88        57
       unacc       0.99      0.96      0.97       970
       vgood       0.94      0.96      0.95        52

    accuracy                           0.95      1383
   macro avg       0.92      0.93      0.93      1383
weighted avg       0.96      0.95      0.95      1383



In [349]:
print(classification_report(y_test, y_predict, digits = 2))

              precision    recall  f1-score   support

         acc       0.89      0.88      0.88        80
        good       0.86      0.50      0.63        12
       unacc       0.96      0.97      0.96       241
       vgood       0.72      1.00      0.84        13

    accuracy                           0.93       346
   macro avg       0.86      0.84      0.83       346
weighted avg       0.93      0.93      0.93       346



In [350]:
confusion_matrix(y_test, y_predict)

array([[ 70,   1,   9,   0],
       [  1,   6,   0,   5],
       [  8,   0, 233,   0],
       [  0,   0,   0,  13]], dtype=int64)

In [355]:
# Sonuçları karşılaştırdığımda her hangi bir overfit olmadığını görüyor ve 0.93 lik bir score un iyi olduğunu düşünüyorum.
# Ayrıyeten bir kaç model daha denemiştim ona da diğer dosyalardan bakabilirsinizç

In [356]:
import joblib

In [357]:
joblib.dump(tuned_model,'Catboost_model.joblib')

['Catboost_model.joblib']