In [39]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor, plot_tree
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold, KFold
from sklearn.metrics import accuracy_score, log_loss
from sklearn.compose import make_column_transformer, make_column_selector
from sklearn.preprocessing import OneHotEncoder, StandardScaler, LabelEncoder
from sklearn.svm import SVC, SVR
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
from sklearn.pipeline import Pipeline
from sklearn.ensemble import BaseEnsemble, VotingClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
import warnings
warnings.filterwarnings('ignore')

In [40]:
sonar = pd.read_csv(r"..\Cases\Sonar\Sonar.csv")
# y = sonar['Class']
le = LabelEncoder()
y = le.fit_transform(sonar['Class'])
X = sonar.drop('Class', axis=1)
print(le.classes_)

['M' 'R']


In [41]:
catb = CatBoostClassifier(random_state=24)

In [42]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,random_state=24,stratify=y)

In [43]:
catb.fit(X_train, y_train)

y_pred = catb.predict(X_test)
y_pred

Learning rate set to 0.004517
0:	learn: 0.6889197	total: 7.88ms	remaining: 7.87s
1:	learn: 0.6859692	total: 14ms	remaining: 7s
2:	learn: 0.6822077	total: 19.9ms	remaining: 6.6s
3:	learn: 0.6789655	total: 24.7ms	remaining: 6.16s
4:	learn: 0.6757520	total: 29.3ms	remaining: 5.84s
5:	learn: 0.6725330	total: 33.5ms	remaining: 5.55s
6:	learn: 0.6692060	total: 37.2ms	remaining: 5.27s
7:	learn: 0.6656518	total: 41.5ms	remaining: 5.15s
8:	learn: 0.6630716	total: 46.3ms	remaining: 5.1s
9:	learn: 0.6600306	total: 50.3ms	remaining: 4.98s
10:	learn: 0.6577681	total: 55.2ms	remaining: 4.96s
11:	learn: 0.6554910	total: 59.5ms	remaining: 4.9s
12:	learn: 0.6520369	total: 64.2ms	remaining: 4.87s
13:	learn: 0.6501118	total: 68.7ms	remaining: 4.84s
14:	learn: 0.6472559	total: 73.1ms	remaining: 4.8s
15:	learn: 0.6443719	total: 77.9ms	remaining: 4.79s
16:	learn: 0.6418872	total: 82.1ms	remaining: 4.74s
17:	learn: 0.6393420	total: 86.5ms	remaining: 4.72s
18:	learn: 0.6366601	total: 90.7ms	remaining: 4.68s
1

array([0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0,
       1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0],
      dtype=int64)

In [44]:
accuracy_score(y_test, y_pred)

0.8095238095238095

In [45]:
y_pred_proba = catb.predict_proba(X_test)
print(y_pred_proba)
print(log_loss(y_test, y_pred_proba))

[[0.79801801 0.20198199]
 [0.78356636 0.21643364]
 [0.55322084 0.44677916]
 [0.37508114 0.62491886]
 [0.84012999 0.15987001]
 [0.88837403 0.11162597]
 [0.10858186 0.89141814]
 [0.93415476 0.06584524]
 [0.44333226 0.55666774]
 [0.01768793 0.98231207]
 [0.88568964 0.11431036]
 [0.83226359 0.16773641]
 [0.89157196 0.10842804]
 [0.02864098 0.97135902]
 [0.48695864 0.51304136]
 [0.45553384 0.54446616]
 [0.98106109 0.01893891]
 [0.59756563 0.40243437]
 [0.8649329  0.1350671 ]
 [0.93084249 0.06915751]
 [0.01577259 0.98422741]
 [0.56133156 0.43866844]
 [0.04668468 0.95331532]
 [0.52146039 0.47853961]
 [0.62944941 0.37055059]
 [0.06000844 0.93999156]
 [0.44672149 0.55327851]
 [0.06190464 0.93809536]
 [0.982745   0.017255  ]
 [0.77189867 0.22810133]
 [0.96715998 0.03284002]
 [0.21095135 0.78904865]
 [0.30129754 0.69870246]
 [0.77370019 0.22629981]
 [0.83762017 0.16237983]
 [0.08426035 0.91573965]
 [0.17470506 0.82529494]
 [0.71501202 0.28498798]
 [0.62908663 0.37091337]
 [0.45768555 0.54231445]


In [46]:
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=24)
print(catb.get_params())

{'random_state': 24}


In [47]:
params = {'learning_rate': np.linspace(0.001, 0.9,10), 
          'max_depth': [None,3,2,4], 
          'n_estimators':[25, 50 ,100],  
          }
gcv = GridSearchCV(catb, param_grid=params, cv=kfold, 
                   scoring='neg_log_loss', n_jobs=-1)
gcv.fit(X, y)

print(gcv.best_score_)
print(gcv.best_params_)

0:	learn: 0.6524093	total: 6.47ms	remaining: 640ms
1:	learn: 0.5867968	total: 11.9ms	remaining: 582ms
2:	learn: 0.5407864	total: 16.6ms	remaining: 536ms
3:	learn: 0.4898752	total: 21.4ms	remaining: 514ms
4:	learn: 0.4571552	total: 26.3ms	remaining: 499ms
5:	learn: 0.4116336	total: 31.1ms	remaining: 487ms
6:	learn: 0.3784222	total: 35.8ms	remaining: 476ms
7:	learn: 0.3508300	total: 41.9ms	remaining: 482ms
8:	learn: 0.3362940	total: 47.3ms	remaining: 478ms
9:	learn: 0.3200833	total: 52.9ms	remaining: 476ms
10:	learn: 0.3015137	total: 58.6ms	remaining: 474ms
11:	learn: 0.2825507	total: 63.6ms	remaining: 466ms
12:	learn: 0.2660288	total: 68.7ms	remaining: 460ms
13:	learn: 0.2508966	total: 74.1ms	remaining: 455ms
14:	learn: 0.2374387	total: 79.5ms	remaining: 451ms
15:	learn: 0.2224103	total: 85ms	remaining: 446ms
16:	learn: 0.2112474	total: 90.3ms	remaining: 441ms
17:	learn: 0.2028015	total: 95.3ms	remaining: 434ms
18:	learn: 0.1903685	total: 100ms	remaining: 428ms
19:	learn: 0.1814866	tota

In [48]:
pd_cv = pd.DataFrame(gcv.cv_results_)
pd_cv

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_learning_rate,param_max_depth,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,1.094852,0.307900,0.019612,0.006048,0.001,,25,"{'learning_rate': 0.001, 'max_depth': None, 'n...",-0.683703,-0.684468,-0.684657,-0.682669,-0.683399,-0.683779,0.000725,116
1,2.369200,0.348681,0.017048,0.007464,0.001,,50,"{'learning_rate': 0.001, 'max_depth': None, 'n...",-0.674490,-0.676129,-0.675545,-0.672288,-0.673938,-0.674478,0.001338,112
2,3.984737,0.199130,0.013183,0.005229,0.001,,100,"{'learning_rate': 0.001, 'max_depth': None, 'n...",-0.656317,-0.658721,-0.657108,-0.651677,-0.656794,-0.656123,0.002365,107
3,0.745948,0.108504,0.020823,0.009244,0.001,3,25,"{'learning_rate': 0.001, 'max_depth': 3, 'n_es...",-0.686711,-0.686230,-0.684721,-0.683415,-0.685162,-0.685248,0.001162,118
4,1.017176,0.198573,0.015882,0.009039,0.001,3,50,"{'learning_rate': 0.001, 'max_depth': 3, 'n_es...",-0.679126,-0.679262,-0.676370,-0.674033,-0.677466,-0.677252,0.001936,114
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
115,0.793918,0.159317,0.010649,0.006673,0.9,2,50,"{'learning_rate': 0.9, 'max_depth': 2, 'n_esti...",-0.462506,-0.544026,-0.490842,-0.335755,-0.577493,-0.482124,0.083441,76
116,1.062197,0.136519,0.017205,0.009746,0.9,2,100,"{'learning_rate': 0.9, 'max_depth': 2, 'n_esti...",-0.465284,-0.514653,-0.551185,-0.339248,-0.605814,-0.495237,0.090514,84
117,0.766284,0.109752,0.014627,0.009593,0.9,4,25,"{'learning_rate': 0.9, 'max_depth': 4, 'n_esti...",-0.569342,-0.697883,-0.693553,-0.576580,-0.817724,-0.671017,0.091661,111
118,0.827030,0.173086,0.010538,0.009107,0.9,4,50,"{'learning_rate': 0.9, 'max_depth': 4, 'n_esti...",-0.497398,-0.652460,-0.773658,-0.512449,-0.783474,-0.643888,0.122583,105


In [51]:
import sklearn
sklearn.__version__


'1.4.2'