In [14]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor, plot_tree
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold, KFold
from sklearn.metrics import accuracy_score, log_loss
from sklearn.compose import make_column_transformer, make_column_selector
from sklearn.preprocessing import OneHotEncoder, StandardScaler, LabelEncoder
from sklearn.svm import SVC, SVR
from xgboost import XGBClassifier
from sklearn.pipeline import Pipeline
from sklearn.ensemble import BaseEnsemble, VotingClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
import warnings
warnings.filterwarnings('ignore')

In [16]:
sonar = pd.read_csv(r"..\Cases\Sonar\Sonar.csv")
# y = sonar['Class']
le = LabelEncoder()
y = le.fit_transform(sonar['Class'])
X = sonar.drop('Class', axis=1)
print(le.classes_)

['M' 'R']


In [17]:
xgb = XGBClassifier(random_state=24)

In [18]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,random_state=24,stratify=y)

In [19]:
xgb.fit(X_train, y_train)

y_pred = xgb.predict(X_test)
y_pred

array([0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1,
       1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0])

In [20]:
accuracy_score(y_test, y_pred)

0.746031746031746

In [21]:
y_pred_proba = xgb.predict_proba(X_test)
print(y_pred_proba)
print(log_loss(y_test, y_pred_proba))

[[8.83177519e-01 1.16822489e-01]
 [9.43017244e-01 5.69827780e-02]
 [7.33969212e-01 2.66030788e-01]
 [2.38176703e-01 7.61823297e-01]
 [9.94193316e-01 5.80667891e-03]
 [9.41436827e-01 5.85631542e-02]
 [1.15018487e-02 9.88498151e-01]
 [9.87586617e-01 1.24134105e-02]
 [4.14776027e-01 5.85223973e-01]
 [4.05013561e-04 9.99594986e-01]
 [7.05017805e-01 2.94982225e-01]
 [9.95264351e-01 4.73565422e-03]
 [9.91625726e-01 8.37427378e-03]
 [8.63850117e-04 9.99136150e-01]
 [1.61333680e-01 8.38666320e-01]
 [2.81706095e-01 7.18293905e-01]
 [9.99804139e-01 1.95870700e-04]
 [1.78228974e-01 8.21771026e-01]
 [9.10417318e-01 8.95827040e-02]
 [9.87586617e-01 1.24134105e-02]
 [7.50601292e-04 9.99249399e-01]
 [7.44031668e-02 9.25596833e-01]
 [2.41851807e-03 9.97581482e-01]
 [4.00908828e-01 5.99091172e-01]
 [7.65591681e-01 2.34408304e-01]
 [3.74054909e-03 9.96259451e-01]
 [9.71745849e-02 9.02825415e-01]
 [6.67804480e-03 9.93321955e-01]
 [9.99702632e-01 2.97396240e-04]
 [8.22158813e-01 1.77841157e-01]
 [9.982450

In [22]:
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=24)
print(xgb.get_params())

{'objective': 'binary:logistic', 'base_score': None, 'booster': None, 'callbacks': None, 'colsample_bylevel': None, 'colsample_bynode': None, 'colsample_bytree': None, 'device': None, 'early_stopping_rounds': None, 'enable_categorical': False, 'eval_metric': None, 'feature_types': None, 'gamma': None, 'grow_policy': None, 'importance_type': None, 'interaction_constraints': None, 'learning_rate': None, 'max_bin': None, 'max_cat_threshold': None, 'max_cat_to_onehot': None, 'max_delta_step': None, 'max_depth': None, 'max_leaves': None, 'min_child_weight': None, 'missing': nan, 'monotone_constraints': None, 'multi_strategy': None, 'n_estimators': None, 'n_jobs': None, 'num_parallel_tree': None, 'random_state': 24, 'reg_alpha': None, 'reg_lambda': None, 'sampling_method': None, 'scale_pos_weight': None, 'subsample': None, 'tree_method': None, 'validate_parameters': None, 'verbosity': None}


In [23]:
params = {'learning_rate': np.linspace(0.001, 0.9,10), 
          'max_depth': [None,3,2,4], 
          'n_estimators':[25, 50 ,100],  
          }
gcv = GridSearchCV(xgb, param_grid=params, cv=kfold, 
                   scoring='neg_log_loss')
gcv.fit(X, y)

print(gcv.best_score_)
print(gcv.best_params_)

-0.37394723225236914
{'learning_rate': 0.20077777777777778, 'max_depth': 2, 'n_estimators': 100}


In [24]:
pd_cv = pd.DataFrame(gcv.cv_results_)
pd_cv

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_learning_rate,param_max_depth,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.070425,0.006501,0.020271,0.004450,0.001,,25,"{'learning_rate': 0.001, 'max_depth': None, 'n...",-0.686798,-0.684864,-0.681100,-0.684902,-0.680652,-0.683663,0.002385,117
1,0.114108,0.011142,0.016196,0.000542,0.001,,50,"{'learning_rate': 0.001, 'max_depth': None, 'n...",-0.681311,-0.678040,-0.673568,-0.678539,-0.671551,-0.676602,0.003541,113
2,0.206853,0.008005,0.019400,0.006493,0.001,,100,"{'learning_rate': 0.001, 'max_depth': None, 'n...",-0.668856,-0.666323,-0.659779,-0.667785,-0.655983,-0.663745,0.005002,109
3,0.054063,0.005494,0.016918,0.000324,0.001,3,25,"{'learning_rate': 0.001, 'max_depth': 3, 'n_es...",-0.686084,-0.687169,-0.679917,-0.686087,-0.681896,-0.684231,0.002813,119
4,0.087334,0.019303,0.013967,0.004767,0.001,3,50,"{'learning_rate': 0.001, 'max_depth': 3, 'n_es...",-0.679953,-0.682316,-0.671095,-0.681907,-0.674201,-0.677894,0.004467,115
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
115,0.058220,0.004190,0.018806,0.003696,0.9,2,50,"{'learning_rate': 0.9, 'max_depth': 2, 'n_esti...",-0.450043,-0.527938,-0.684276,-0.311333,-0.626283,-0.519975,0.131743,108
116,0.089085,0.009080,0.019055,0.003652,0.9,2,100,"{'learning_rate': 0.9, 'max_depth': 2, 'n_esti...",-0.437635,-0.527574,-0.684284,-0.312802,-0.626283,-0.517716,0.132689,107
117,0.043620,0.005577,0.014372,0.007894,0.9,4,25,"{'learning_rate': 0.9, 'max_depth': 4, 'n_esti...",-0.450488,-0.488399,-0.642736,-0.380963,-0.585427,-0.509603,0.093772,105
118,0.066416,0.000506,0.016704,0.004008,0.9,4,50,"{'learning_rate': 0.9, 'max_depth': 4, 'n_esti...",-0.444218,-0.494482,-0.631530,-0.367866,-0.580130,-0.503645,0.094081,104
