In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [4]:
df_original = pd.read_csv("car_acceptability.txt", sep = ",", header = None)
df_original.head()

Unnamed: 0,0,1,2,3,4,5,6
0,vhigh,vhigh,2,2,small,low,unacc
1,vhigh,vhigh,2,2,small,med,unacc
2,vhigh,vhigh,2,?,small,high,unacc
3,vhigh,vhigh,2,2,med,low,unacc
4,vhigh,vhigh,2,2,med,?,unacc


In [5]:
df = df_original.copy()

In [6]:
df.columns = ["price", "maint", "doors", "capacity", "lugg_capacity", "safety", "situation"] # sütunları yeniden adlandırdık

In [7]:
df.head()

Unnamed: 0,price,maint,doors,capacity,lugg_capacity,safety,situation
0,vhigh,vhigh,2,2,small,low,unacc
1,vhigh,vhigh,2,2,small,med,unacc
2,vhigh,vhigh,2,?,small,high,unacc
3,vhigh,vhigh,2,2,med,low,unacc
4,vhigh,vhigh,2,2,med,?,unacc


In [8]:
df.info() # datamız genel olarak temiz gözüküyor lakin bütün sütunlar object değerinde sayısal sütunların içinde str değerleri olabilir.

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1729 entries, 0 to 1728
Data columns (total 7 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   price          1728 non-null   object
 1   maint          1729 non-null   object
 2   doors          1729 non-null   object
 3   capacity       1729 non-null   object
 4   lugg_capacity  1729 non-null   object
 5   safety         1728 non-null   object
 6   situation      1729 non-null   object
dtypes: object(7)
memory usage: 94.7+ KB


In [9]:
# sütünlardaki bütün kategoilere bakıyoruz
for col in df.columns:
    print("****************************")
    print(f"{col} sütunu")
    print("****************************")
    print(df[col].value_counts())

****************************
price sütunu
****************************
high     432
med      431
vhigh    430
low      430
?          3
-          1
düsük      1
Name: price, dtype: int64
****************************
maint sütunu
****************************
high     432
med      432
low      432
vhigh    429
?          4
Name: maint, dtype: int64
****************************
doors sütunu
****************************
3        432
2        430
4        430
5more    430
?          3
-          1
44         1
iki        1
5+         1
Name: doors, dtype: int64
****************************
capacity sütunu
****************************
4       576
more    576
2       574
?         3
Name: capacity, dtype: int64
****************************
lugg_capacity sütunu
****************************
small    576
big      576
med      575
?          2
Name: lugg_capacity, dtype: int64
****************************
safety sütunu
****************************
high    576
med     574
low     573
?         2


In [10]:
# Target sütunu dışında verimiz dengeli dağılmış gözüküyor, ayrıyeten verimizde bir kaç tane geçersiz 
# değerler var bu değerleri uygun bir şekilde ele alacağız.

In [11]:
from sklearn.impute import SimpleImputer

In [12]:
# geçersiz değerleri geçerli olabilecek değerlere dönüştürüyoruz 
df.replace({"?": np.nan, "*": np.nan, "-": np.nan, "düsük": "low", "5more": "5", "5+": "5", "iki": "2", "44":"4", "more": "5"}, inplace = True)

In [13]:
for col in df.columns:
    print("****************************")
    print(f"{col} sütunu")
    print("****************************")
    print(df[col].value_counts())

****************************
price sütunu
****************************
high     432
med      431
low      431
vhigh    430
Name: price, dtype: int64
****************************
maint sütunu
****************************
high     432
med      432
low      432
vhigh    429
Name: maint, dtype: int64
****************************
doors sütunu
****************************
3    432
2    431
4    431
5    431
Name: doors, dtype: int64
****************************
capacity sütunu
****************************
4    576
5    576
2    574
Name: capacity, dtype: int64
****************************
lugg_capacity sütunu
****************************
small    576
big      576
med      575
Name: lugg_capacity, dtype: int64
****************************
safety sütunu
****************************
high    576
med     574
low     573
Name: safety, dtype: int64
****************************
situation sütunu
****************************
unacc    1209
acc       384
good       69
vgood      65
Name: situation, dtyp

In [14]:
df.head()

Unnamed: 0,price,maint,doors,capacity,lugg_capacity,safety,situation
0,vhigh,vhigh,2,2.0,small,low,unacc
1,vhigh,vhigh,2,2.0,small,med,unacc
2,vhigh,vhigh,2,,small,high,unacc
3,vhigh,vhigh,2,2.0,med,low,unacc
4,vhigh,vhigh,2,2.0,med,,unacc


In [15]:
# verimzde olan nan değerleri mode değerleriyle dolduruyoruz
imputer = SimpleImputer(strategy = "most_frequent")

In [16]:
df = imputer.fit_transform(df)

In [17]:
df = pd.DataFrame(df , columns = ["price", "maint", "doors", "capacity", "lugg_capacity", "safety", "situation"])

In [18]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1729 entries, 0 to 1728
Data columns (total 7 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   price          1729 non-null   object
 1   maint          1729 non-null   object
 2   doors          1729 non-null   object
 3   capacity       1729 non-null   object
 4   lugg_capacity  1729 non-null   object
 5   safety         1729 non-null   object
 6   situation      1729 non-null   object
dtypes: object(7)
memory usage: 94.7+ KB


In [19]:
from sklearn.model_selection import train_test_split

In [20]:
y = df["situation"]
X = df.drop("situation", axis= 1)

In [21]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,random_state=42) 

In [22]:
X_train

Unnamed: 0,price,maint,doors,capacity,lugg_capacity,safety
107,vhigh,vhigh,5,5,big,high
901,med,vhigh,3,4,small,med
1710,low,low,5,4,small,low
706,high,med,4,2,med,med
678,high,med,3,2,med,low
...,...,...,...,...,...,...
1130,med,med,3,5,med,high
1294,med,low,5,5,big,med
860,high,low,5,5,med,high
1459,low,high,4,2,small,med


In [23]:
# Verimiz ağırlıklı olarak kategorik değişkenlerden oluştuğundan dolayı Catboost algoritmasını kullanmayı tercih ediyorum.
from catboost import CatBoostClassifier, Pool

In [24]:
train_pool = Pool(X_train, y_train, cat_features = list(X_train.columns))
test_pool = Pool(X_test, y_test, cat_features = list(X_test.columns))

In [25]:
# optimizasyon yaparken denenecek değerler
param_dist = {
    "learning_rate": [0.03, 0.1, 0.3],
    "depth": [3, 5, 7],
    "l2_leaf_reg": [1, 3, 5] # l2_leaf_regularization
}

In [26]:
model = CatBoostClassifier(n_estimators = 100)

In [27]:
# optimizasyonu randomized search ile kurmayı tercih ettim grid search ün fazla yavaş olacağını düşünmemden kaynaklı.
search_results = model.randomized_search(
                    param_dist, train_pool,
                    cv = 3, n_iter = 10,
                    partition_random_seed = 548574,
                    verbose = 1)

0:	learn: 1.3433867	test: 1.3448752	best: 1.3448752 (0)	total: 204ms	remaining: 20.2s
1:	learn: 1.3047945	test: 1.3077214	best: 1.3077214 (1)	total: 228ms	remaining: 11.2s
2:	learn: 1.2714960	test: 1.2776732	best: 1.2776732 (2)	total: 243ms	remaining: 7.85s
3:	learn: 1.2391184	test: 1.2465465	best: 1.2465465 (3)	total: 265ms	remaining: 6.36s
4:	learn: 1.2088615	test: 1.2167831	best: 1.2167831 (4)	total: 287ms	remaining: 5.45s
5:	learn: 1.1813000	test: 1.1900414	best: 1.1900414 (5)	total: 311ms	remaining: 4.87s
6:	learn: 1.1557667	test: 1.1653011	best: 1.1653011 (6)	total: 328ms	remaining: 4.35s
7:	learn: 1.1320098	test: 1.1423469	best: 1.1423469 (7)	total: 350ms	remaining: 4.03s
8:	learn: 1.1098739	test: 1.1209006	best: 1.1209006 (8)	total: 366ms	remaining: 3.7s
9:	learn: 1.0891324	test: 1.1009289	best: 1.1009289 (9)	total: 388ms	remaining: 3.49s
10:	learn: 1.0693113	test: 1.0818209	best: 1.0818209 (10)	total: 413ms	remaining: 3.34s
11:	learn: 1.0508466	test: 1.0643909	best: 1.0643909 

95:	learn: 0.4916116	test: 0.5454673	best: 0.5454673 (95)	total: 1.89s	remaining: 78.9ms
96:	learn: 0.4895328	test: 0.5433092	best: 0.5433092 (96)	total: 1.91s	remaining: 59.1ms
97:	learn: 0.4871495	test: 0.5410533	best: 0.5410533 (97)	total: 1.93s	remaining: 39.3ms
98:	learn: 0.4851363	test: 0.5389311	best: 0.5389311 (98)	total: 1.94s	remaining: 19.6ms
99:	learn: 0.4843229	test: 0.5383312	best: 0.5383312 (99)	total: 1.96s	remaining: 0us

bestTest = 0.5383312348
bestIteration = 99

0:	loss: 0.5383312	best: 0.5383312 (0)	total: 2.2s	remaining: 19.8s
0:	learn: 1.2486099	test: 1.2534612	best: 1.2534612 (0)	total: 18.2ms	remaining: 1.8s
1:	learn: 1.1509740	test: 1.1594675	best: 1.1594675 (1)	total: 40.1ms	remaining: 1.97s
2:	learn: 1.0791395	test: 1.0939024	best: 1.0939024 (2)	total: 55.1ms	remaining: 1.78s
3:	learn: 1.0188108	test: 1.0357453	best: 1.0357453 (3)	total: 78.6ms	remaining: 1.89s
4:	learn: 0.9683613	test: 0.9866194	best: 0.9866194 (4)	total: 105ms	remaining: 2s
5:	learn: 0.927

96:	learn: 0.3058949	test: 0.3372239	best: 0.3371846 (95)	total: 1.82s	remaining: 56.3ms
97:	learn: 0.3050049	test: 0.3356755	best: 0.3356755 (97)	total: 1.84s	remaining: 37.5ms
98:	learn: 0.3032255	test: 0.3343389	best: 0.3343389 (98)	total: 1.85s	remaining: 18.7ms
99:	learn: 0.3022012	test: 0.3331911	best: 0.3331911 (99)	total: 1.87s	remaining: 0us

bestTest = 0.3331911053
bestIteration = 99

1:	loss: 0.3331911	best: 0.3331911 (1)	total: 4.09s	remaining: 16.4s
0:	learn: 1.3448847	test: 1.3462908	best: 1.3462908 (0)	total: 12.7ms	remaining: 1.25s
1:	learn: 1.3076077	test: 1.3102054	best: 1.3102054 (1)	total: 28.2ms	remaining: 1.38s
2:	learn: 1.2749394	test: 1.2807573	best: 1.2807573 (2)	total: 38.5ms	remaining: 1.24s
3:	learn: 1.2437375	test: 1.2503842	best: 1.2503842 (3)	total: 53.4ms	remaining: 1.28s
4:	learn: 1.2148618	test: 1.2217213	best: 1.2217213 (4)	total: 68.3ms	remaining: 1.3s
5:	learn: 1.1877752	test: 1.1953870	best: 1.1953870 (5)	total: 83.1ms	remaining: 1.3s
6:	learn: 1.1

92:	learn: 0.5167101	test: 0.5684427	best: 0.5684427 (92)	total: 1.42s	remaining: 107ms
93:	learn: 0.5152272	test: 0.5670745	best: 0.5670745 (93)	total: 1.44s	remaining: 91.9ms
94:	learn: 0.5109531	test: 0.5620010	best: 0.5620010 (94)	total: 1.46s	remaining: 76.6ms
95:	learn: 0.5066508	test: 0.5569672	best: 0.5569672 (95)	total: 1.47s	remaining: 61.3ms
96:	learn: 0.5026091	test: 0.5527565	best: 0.5527565 (96)	total: 1.49s	remaining: 45.9ms
97:	learn: 0.4998851	test: 0.5496529	best: 0.5496529 (97)	total: 1.5s	remaining: 30.6ms
98:	learn: 0.4962055	test: 0.5457475	best: 0.5457475 (98)	total: 1.51s	remaining: 15.3ms
99:	learn: 0.4940818	test: 0.5434049	best: 0.5434049 (99)	total: 1.53s	remaining: 0us

bestTest = 0.5434048743
bestIteration = 99

2:	loss: 0.5434049	best: 0.3331911 (1)	total: 5.64s	remaining: 13.2s
0:	learn: 1.2532237	test: 1.2578127	best: 1.2578127 (0)	total: 12.6ms	remaining: 1.24s
1:	learn: 1.1577788	test: 1.1653681	best: 1.1653681 (1)	total: 27.9ms	remaining: 1.36s
2:	le

88:	learn: 0.3349628	test: 0.3584519	best: 0.3584519 (88)	total: 1.38s	remaining: 171ms
89:	learn: 0.3341853	test: 0.3579668	best: 0.3579668 (89)	total: 1.4s	remaining: 155ms
90:	learn: 0.3333397	test: 0.3574061	best: 0.3574061 (90)	total: 1.41s	remaining: 140ms
91:	learn: 0.3324821	test: 0.3565794	best: 0.3565794 (91)	total: 1.43s	remaining: 124ms
92:	learn: 0.3319710	test: 0.3559037	best: 0.3559037 (92)	total: 1.44s	remaining: 109ms
93:	learn: 0.3319507	test: 0.3559207	best: 0.3559037 (92)	total: 1.46s	remaining: 93.2ms
94:	learn: 0.3319388	test: 0.3559304	best: 0.3559037 (92)	total: 1.47s	remaining: 77.6ms
95:	learn: 0.3318454	test: 0.3559353	best: 0.3559037 (92)	total: 1.49s	remaining: 62ms
96:	learn: 0.3310875	test: 0.3551851	best: 0.3551851 (96)	total: 1.5s	remaining: 46.5ms
97:	learn: 0.3299823	test: 0.3542271	best: 0.3542271 (97)	total: 1.52s	remaining: 31ms
98:	learn: 0.3254031	test: 0.3472964	best: 0.3472964 (98)	total: 1.53s	remaining: 15.5ms
99:	learn: 0.3250096	test: 0.347

87:	learn: 0.2527969	test: 0.2877429	best: 0.2877429 (87)	total: 1.35s	remaining: 184ms
88:	learn: 0.2527210	test: 0.2876919	best: 0.2876919 (88)	total: 1.36s	remaining: 168ms
89:	learn: 0.2512721	test: 0.2875316	best: 0.2875316 (89)	total: 1.38s	remaining: 153ms
90:	learn: 0.2500814	test: 0.2866930	best: 0.2866930 (90)	total: 1.39s	remaining: 138ms
91:	learn: 0.2499203	test: 0.2866656	best: 0.2866656 (91)	total: 1.41s	remaining: 123ms
92:	learn: 0.2489204	test: 0.2859731	best: 0.2859731 (92)	total: 1.42s	remaining: 107ms
93:	learn: 0.2487106	test: 0.2860128	best: 0.2859731 (92)	total: 1.44s	remaining: 91.8ms
94:	learn: 0.2464995	test: 0.2807249	best: 0.2807249 (94)	total: 1.45s	remaining: 76.6ms
95:	learn: 0.2462222	test: 0.2802327	best: 0.2802327 (95)	total: 1.47s	remaining: 61.3ms
96:	learn: 0.2442481	test: 0.2790334	best: 0.2790334 (96)	total: 1.49s	remaining: 46ms
97:	learn: 0.2437344	test: 0.2786806	best: 0.2786806 (97)	total: 1.5s	remaining: 30.6ms
98:	learn: 0.2430422	test: 0.2

86:	learn: 0.2377550	test: 0.2718324	best: 0.2718324 (86)	total: 2.29s	remaining: 342ms
87:	learn: 0.2355378	test: 0.2704421	best: 0.2704421 (87)	total: 2.32s	remaining: 316ms
88:	learn: 0.2346669	test: 0.2690890	best: 0.2690890 (88)	total: 2.35s	remaining: 290ms
89:	learn: 0.2315285	test: 0.2660732	best: 0.2660732 (89)	total: 2.37s	remaining: 264ms
90:	learn: 0.2307518	test: 0.2654111	best: 0.2654111 (90)	total: 2.4s	remaining: 238ms
91:	learn: 0.2300401	test: 0.2650440	best: 0.2650440 (91)	total: 2.43s	remaining: 211ms
92:	learn: 0.2282871	test: 0.2640749	best: 0.2640749 (92)	total: 2.46s	remaining: 185ms
93:	learn: 0.2273393	test: 0.2630222	best: 0.2630222 (93)	total: 2.49s	remaining: 159ms
94:	learn: 0.2269767	test: 0.2628028	best: 0.2628028 (94)	total: 2.52s	remaining: 132ms
95:	learn: 0.2263556	test: 0.2627910	best: 0.2627910 (95)	total: 2.54s	remaining: 106ms
96:	learn: 0.2251353	test: 0.2626592	best: 0.2626592 (96)	total: 2.57s	remaining: 79.6ms
97:	learn: 0.2224875	test: 0.260

83:	learn: 0.1191002	test: 0.1942436	best: 0.1942436 (83)	total: 2.26s	remaining: 431ms
84:	learn: 0.1179766	test: 0.1933037	best: 0.1933037 (84)	total: 2.29s	remaining: 405ms
85:	learn: 0.1171092	test: 0.1926780	best: 0.1926780 (85)	total: 2.32s	remaining: 378ms
86:	learn: 0.1159007	test: 0.1916922	best: 0.1916922 (86)	total: 2.35s	remaining: 351ms
87:	learn: 0.1149353	test: 0.1909588	best: 0.1909588 (87)	total: 2.37s	remaining: 324ms
88:	learn: 0.1143828	test: 0.1904701	best: 0.1904701 (88)	total: 2.4s	remaining: 297ms
89:	learn: 0.1139092	test: 0.1902001	best: 0.1902001 (89)	total: 2.43s	remaining: 270ms
90:	learn: 0.1128475	test: 0.1912087	best: 0.1902001 (89)	total: 2.45s	remaining: 243ms
91:	learn: 0.1122537	test: 0.1915689	best: 0.1902001 (89)	total: 2.48s	remaining: 216ms
92:	learn: 0.1110023	test: 0.1916405	best: 0.1902001 (89)	total: 2.51s	remaining: 189ms
93:	learn: 0.1099187	test: 0.1925600	best: 0.1902001 (89)	total: 2.54s	remaining: 162ms
94:	learn: 0.1081717	test: 0.1927

80:	learn: 0.1696953	test: 0.2221413	best: 0.2212936 (78)	total: 2.21s	remaining: 517ms
81:	learn: 0.1689731	test: 0.2215461	best: 0.2212936 (78)	total: 2.23s	remaining: 491ms
82:	learn: 0.1669824	test: 0.2219572	best: 0.2212936 (78)	total: 2.26s	remaining: 463ms
83:	learn: 0.1657974	test: 0.2221339	best: 0.2212936 (78)	total: 2.29s	remaining: 436ms
84:	learn: 0.1654408	test: 0.2218567	best: 0.2212936 (78)	total: 2.31s	remaining: 409ms
85:	learn: 0.1643572	test: 0.2205135	best: 0.2205135 (85)	total: 2.34s	remaining: 381ms
86:	learn: 0.1639775	test: 0.2204778	best: 0.2204778 (86)	total: 2.37s	remaining: 354ms
87:	learn: 0.1635195	test: 0.2202937	best: 0.2202937 (87)	total: 2.4s	remaining: 327ms
88:	learn: 0.1615282	test: 0.2186571	best: 0.2186571 (88)	total: 2.43s	remaining: 300ms
89:	learn: 0.1606474	test: 0.2180116	best: 0.2180116 (89)	total: 2.45s	remaining: 273ms
90:	learn: 0.1586887	test: 0.2144119	best: 0.2144119 (90)	total: 2.48s	remaining: 245ms
91:	learn: 0.1586104	test: 0.2143

74:	learn: 0.5495628	test: 0.5993965	best: 0.5993965 (74)	total: 1.7s	remaining: 567ms
75:	learn: 0.5435839	test: 0.5925003	best: 0.5925003 (75)	total: 1.73s	remaining: 546ms
76:	learn: 0.5368624	test: 0.5849952	best: 0.5849952 (76)	total: 1.76s	remaining: 525ms
77:	learn: 0.5313569	test: 0.5786274	best: 0.5786274 (77)	total: 1.78s	remaining: 504ms
78:	learn: 0.5266862	test: 0.5736933	best: 0.5736933 (78)	total: 1.81s	remaining: 482ms
79:	learn: 0.5206585	test: 0.5670079	best: 0.5670079 (79)	total: 1.84s	remaining: 460ms
80:	learn: 0.5158694	test: 0.5611505	best: 0.5611505 (80)	total: 1.87s	remaining: 438ms
81:	learn: 0.5107369	test: 0.5552974	best: 0.5552974 (81)	total: 1.9s	remaining: 416ms
82:	learn: 0.5064033	test: 0.5505533	best: 0.5505533 (82)	total: 1.92s	remaining: 394ms
83:	learn: 0.5011617	test: 0.5447746	best: 0.5447746 (83)	total: 1.95s	remaining: 372ms
84:	learn: 0.4971636	test: 0.5409463	best: 0.5409463 (84)	total: 1.98s	remaining: 350ms
85:	learn: 0.4929458	test: 0.53656

70:	learn: 0.2507921	test: 0.2736751	best: 0.2736751 (70)	total: 3.41s	remaining: 1.39s
71:	learn: 0.2499372	test: 0.2728985	best: 0.2728985 (71)	total: 3.47s	remaining: 1.35s
72:	learn: 0.2488538	test: 0.2720449	best: 0.2720449 (72)	total: 3.54s	remaining: 1.31s
73:	learn: 0.2472289	test: 0.2711432	best: 0.2711432 (73)	total: 3.6s	remaining: 1.26s
74:	learn: 0.2465057	test: 0.2706138	best: 0.2706138 (74)	total: 3.67s	remaining: 1.22s
75:	learn: 0.2456312	test: 0.2700373	best: 0.2700373 (75)	total: 3.73s	remaining: 1.18s
76:	learn: 0.2441825	test: 0.2684334	best: 0.2684334 (76)	total: 3.8s	remaining: 1.13s
77:	learn: 0.2412391	test: 0.2665333	best: 0.2665333 (77)	total: 3.86s	remaining: 1.09s
78:	learn: 0.2402628	test: 0.2656466	best: 0.2656466 (78)	total: 3.93s	remaining: 1.04s
79:	learn: 0.2394571	test: 0.2649935	best: 0.2649935 (79)	total: 4s	remaining: 1s
80:	learn: 0.2368698	test: 0.2629538	best: 0.2629538 (80)	total: 4.07s	remaining: 955ms
81:	learn: 0.2349650	test: 0.2611767	bes

66:	learn: 0.1385219	test: 0.1973144	best: 0.1972073 (65)	total: 1.97s	remaining: 970ms
67:	learn: 0.1375994	test: 0.1975372	best: 0.1972073 (65)	total: 2s	remaining: 941ms
68:	learn: 0.1363939	test: 0.1991165	best: 0.1972073 (65)	total: 2.03s	remaining: 911ms
69:	learn: 0.1345729	test: 0.1987088	best: 0.1972073 (65)	total: 2.05s	remaining: 880ms
70:	learn: 0.1330891	test: 0.1982176	best: 0.1972073 (65)	total: 2.08s	remaining: 849ms
71:	learn: 0.1318063	test: 0.1999572	best: 0.1972073 (65)	total: 2.1s	remaining: 819ms
72:	learn: 0.1284642	test: 0.1997790	best: 0.1972073 (65)	total: 2.13s	remaining: 788ms
73:	learn: 0.1277218	test: 0.1994846	best: 0.1972073 (65)	total: 2.16s	remaining: 758ms
74:	learn: 0.1269766	test: 0.1986499	best: 0.1972073 (65)	total: 2.19s	remaining: 729ms
75:	learn: 0.1258406	test: 0.1988526	best: 0.1972073 (65)	total: 2.21s	remaining: 699ms
76:	learn: 0.1249764	test: 0.1981620	best: 0.1972073 (65)	total: 2.24s	remaining: 669ms
77:	learn: 0.1238576	test: 0.1969454

63:	learn: 0.1429084	test: 0.2071780	best: 0.2053830 (62)	total: 1.73s	remaining: 972ms
64:	learn: 0.1418810	test: 0.2071224	best: 0.2053830 (62)	total: 1.75s	remaining: 945ms
65:	learn: 0.1407856	test: 0.2067492	best: 0.2053830 (62)	total: 1.78s	remaining: 918ms
66:	learn: 0.1405279	test: 0.2066493	best: 0.2053830 (62)	total: 1.81s	remaining: 891ms
67:	learn: 0.1392711	test: 0.2074959	best: 0.2053830 (62)	total: 1.83s	remaining: 863ms
68:	learn: 0.1382855	test: 0.2077971	best: 0.2053830 (62)	total: 1.86s	remaining: 836ms
69:	learn: 0.1369991	test: 0.2070170	best: 0.2053830 (62)	total: 1.89s	remaining: 808ms
70:	learn: 0.1355464	test: 0.2070116	best: 0.2053830 (62)	total: 1.91s	remaining: 781ms
71:	learn: 0.1337157	test: 0.2072002	best: 0.2053830 (62)	total: 1.94s	remaining: 754ms
72:	learn: 0.1312187	test: 0.2068380	best: 0.2053830 (62)	total: 1.97s	remaining: 728ms
73:	learn: 0.1297442	test: 0.2070639	best: 0.2053830 (62)	total: 1.99s	remaining: 701ms
74:	learn: 0.1272244	test: 0.207

60:	learn: 0.1429544	test: 0.2732905	best: 0.2728880 (55)	total: 1.66s	remaining: 1.06s
61:	learn: 0.1405246	test: 0.2668361	best: 0.2668361 (61)	total: 1.69s	remaining: 1.03s
62:	learn: 0.1396940	test: 0.2647484	best: 0.2647484 (62)	total: 1.72s	remaining: 1.01s
63:	learn: 0.1386349	test: 0.2651357	best: 0.2647484 (62)	total: 1.74s	remaining: 980ms
64:	learn: 0.1372930	test: 0.2644763	best: 0.2644763 (64)	total: 1.77s	remaining: 953ms
65:	learn: 0.1351349	test: 0.2629440	best: 0.2629440 (65)	total: 1.8s	remaining: 925ms
66:	learn: 0.1332614	test: 0.2632809	best: 0.2629440 (65)	total: 1.82s	remaining: 898ms
67:	learn: 0.1299292	test: 0.2645131	best: 0.2629440 (65)	total: 1.85s	remaining: 871ms
68:	learn: 0.1251650	test: 0.2576539	best: 0.2576539 (68)	total: 1.88s	remaining: 844ms
69:	learn: 0.1241114	test: 0.2564537	best: 0.2564537 (69)	total: 1.91s	remaining: 818ms
70:	learn: 0.1227947	test: 0.2563097	best: 0.2563097 (70)	total: 1.94s	remaining: 790ms
71:	learn: 0.1217649	test: 0.2552

In [28]:
search_results["params"]

{'depth': 5, 'l2_leaf_reg': 1, 'learning_rate': 0.3}

In [29]:
tuned_model = CatBoostClassifier(iterations=100, depth=5, learning_rate=0.3, l2_leaf_reg=1)

In [30]:
tuned_model.fit(train_pool)

0:	learn: 1.0310259	total: 21.2ms	remaining: 2.1s
1:	learn: 0.8997423	total: 53.4ms	remaining: 2.61s
2:	learn: 0.7511689	total: 78.3ms	remaining: 2.53s
3:	learn: 0.6617947	total: 118ms	remaining: 2.84s
4:	learn: 0.6135078	total: 144ms	remaining: 2.73s
5:	learn: 0.5562504	total: 184ms	remaining: 2.89s
6:	learn: 0.5151695	total: 218ms	remaining: 2.89s
7:	learn: 0.4865744	total: 247ms	remaining: 2.84s
8:	learn: 0.4384613	total: 281ms	remaining: 2.84s
9:	learn: 0.4075302	total: 310ms	remaining: 2.79s
10:	learn: 0.3925027	total: 339ms	remaining: 2.74s
11:	learn: 0.3632998	total: 367ms	remaining: 2.69s
12:	learn: 0.3476781	total: 396ms	remaining: 2.65s
13:	learn: 0.3364723	total: 424ms	remaining: 2.6s
14:	learn: 0.3275757	total: 453ms	remaining: 2.57s
15:	learn: 0.3185223	total: 489ms	remaining: 2.57s
16:	learn: 0.3182146	total: 508ms	remaining: 2.48s
17:	learn: 0.3097006	total: 540ms	remaining: 2.46s
18:	learn: 0.3076015	total: 560ms	remaining: 2.39s
19:	learn: 0.3073364	total: 579ms	remain

<catboost.core.CatBoostClassifier at 0x1fb9a2eea40>

In [31]:
y_predict = tuned_model.predict(X_test)

In [32]:
y_predict

array([['unacc'],
       ['acc'],
       ['unacc'],
       ['acc'],
       ['unacc'],
       ['unacc'],
       ['unacc'],
       ['unacc'],
       ['unacc'],
       ['unacc'],
       ['unacc'],
       ['acc'],
       ['unacc'],
       ['unacc'],
       ['unacc'],
       ['unacc'],
       ['unacc'],
       ['unacc'],
       ['unacc'],
       ['acc'],
       ['unacc'],
       ['acc'],
       ['acc'],
       ['unacc'],
       ['acc'],
       ['unacc'],
       ['unacc'],
       ['unacc'],
       ['unacc'],
       ['unacc'],
       ['unacc'],
       ['unacc'],
       ['unacc'],
       ['vgood'],
       ['acc'],
       ['vgood'],
       ['vgood'],
       ['good'],
       ['unacc'],
       ['unacc'],
       ['unacc'],
       ['unacc'],
       ['acc'],
       ['acc'],
       ['acc'],
       ['unacc'],
       ['unacc'],
       ['acc'],
       ['unacc'],
       ['unacc'],
       ['unacc'],
       ['unacc'],
       ['acc'],
       ['unacc'],
       ['unacc'],
       ['acc'],
       ['unacc'],
   

In [33]:
y_train_predict = tuned_model.predict(X_train)

In [34]:
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

In [35]:
accuracy_score(y_test, y_predict)

0.9566473988439307

In [36]:
print(classification_report(y_train, y_train_predict, digits = 2))

              precision    recall  f1-score   support

         acc       0.92      0.92      0.92       304
        good       0.92      0.81      0.86        57
       unacc       0.98      0.98      0.98       970
       vgood       0.89      0.98      0.94        52

    accuracy                           0.96      1383
   macro avg       0.93      0.92      0.93      1383
weighted avg       0.96      0.96      0.96      1383



In [37]:
print(classification_report(y_test, y_predict, digits = 2))

              precision    recall  f1-score   support

         acc       0.99      0.89      0.93        80
        good       0.86      0.50      0.63        12
       unacc       0.97      1.00      0.98       241
       vgood       0.72      1.00      0.84        13

    accuracy                           0.96       346
   macro avg       0.88      0.85      0.85       346
weighted avg       0.96      0.96      0.95       346



In [38]:
confusion_matrix(y_test, y_predict)

array([[ 71,   1,   8,   0],
       [  1,   6,   0,   5],
       [  0,   0, 241,   0],
       [  0,   0,   0,  13]], dtype=int64)

In [39]:
# Sonuçları karşılaştırdığımda her hangi bir overfit olmadığını görüyor ve 0.95 lik bir score un iyi olduğunu düşünüyorum.
# Ayrıyeten bir kaç model daha denemiştim ona da diğer dosyalardan bakabilirsinizç

In [40]:
import joblib

In [41]:
joblib.dump(tuned_model , 'Catboost_model.joblib')

['Catboost_model.joblib']