In [222]:
import pandas as pd
import numpy as np
from phik.report import plot_correlation_matrix
from catboost import CatBoostClassifier, Pool
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, \
    classification_report, roc_auc_score
from sklearn.preprocessing import PolynomialFeatures
import matplotlib.pyplot as plt
plt.style.use('dark_background')

In [334]:
df_raw = pd.read_csv('/kaggle/input/leopard-challenge-classification/train.csv')
test = pd.read_csv(r'/kaggle/input/leopard-challenge-classification/test.csv')
df_raw.shape, test.shape

((13863, 26), (5942, 25))

In [353]:
test_cv = test.drop(['oral', 'ID'], axis = 1).copy()
test_cv['tartar'] = test_cv['tartar'].apply(lambda x: 1 if x == 'Y' else 0)

In [354]:
merged_data = df_raw.drop(['oral', 'ID'], axis=1).copy()
merged_data['tartar'] = merged_data['tartar'].apply(lambda x: 1 if x == 'Y' else 0)

In [355]:
# Создание бинарного признака на основе BMI
merged_data['BMI'] = merged_data['weight(kg)'] / ((merged_data['height(cm)'] / 100) ** 2)
merged_data['BMI_status'] = (merged_data['BMI'] < 18.5) | (merged_data['BMI'] >= 24.9)
merged_data['BMI_status'] = merged_data['BMI_status'].astype(int)
merged_data['log_tr'] = merged_data['triglyceride'].apply(np.log)
merged_data['log_gtp'] = merged_data['Gtp'].apply(np.log)
merged_data['log_log_alt'] = np.log(np.log(merged_data['ALT']))
merged_data['BMI_log_gtp'] = merged_data['BMI'].apply(np.log)
merged_data = merged_data.drop(['triglyceride', 'Gtp', 'ALT', 'BMI'], axis=1)

test_cv['BMI'] = test_cv['weight(kg)'] / ((test_cv['height(cm)'] / 100) ** 2)
test_cv['BMI_status'] = (test_cv['BMI'] < 18.5) | (test_cv['BMI'] >= 24.9)
test_cv['BMI_status'] = test_cv['BMI_status'].astype(int)
test_cv['log_tr'] = test_cv['triglyceride'].apply(np.log)
test_cv['log_gtp'] = test_cv['Gtp'].apply(np.log)
test_cv['log_log_alt'] = np.log(np.log(test_cv['ALT']))
test_cv['BMI_log_gtp'] = test_cv['BMI'].apply(np.log)
test_cv = test_cv.drop(['triglyceride', 'Gtp', 'ALT', 'BMI'], axis=1)

In [356]:
from sklearn.model_selection import train_test_split
polynom = PolynomialFeatures(interaction_only=True, include_bias=False)

X = merged_data.drop(['smoking'], axis=1) 
Y = merged_data['smoking']

# X_poly = pd.DataFrame(polynom.fit_transform(X))
# test_cv_poly = pd.DataFrame(polynom.transform(test_cv))

X_new_train, X_test_sim, Y_new_train, Y_test_sim = train_test_split(X, Y, test_size=0.3, random_state=42, stratify=Y)
X_new_train.shape, X_test_sim.shape, Y_new_train.shape, Y_test_sim.shape

((9704, 25), (4159, 25), (9704,), (4159,))

In [357]:
# X_exper = X_poly.join(Y)

In [358]:
# table = X_exper\
#     .phik_matrix()['smoking'].to_frame()\
#     .sort_values('smoking', ascending=False).round(4)

In [359]:
# strong_cols = table.iloc[1:101, :].index.to_list()
# strong_cols

In [360]:
# X_poly = X_poly[strong_cols]
# test_cv_poly = test_cv_poly[strong_cols]
# X_poly.shape, test_cv_poly.shape

In [361]:
from sklearn.preprocessing import MinMaxScaler

# Масштабирование признаков
scaler = MinMaxScaler()

X_scale = scaler.fit_transform(X_new_train)
test_cv = scaler.transform(X_test_sim)

X = pd.DataFrame(X_scale)
X.shape, test_cv.shape

((9704, 25), (4159, 25))

In [362]:
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.preprocessing import MinMaxScaler, StandardScaler, LabelEncoder, FunctionTransformer
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.compose import ColumnTransformer
from sklearn.impute import KNNImputer
from sklearn.multiclass import OneVsRestClassifier
from sklearn.model_selection import KFold, StratifiedKFold, train_test_split, GridSearchCV, RepeatedStratifiedKFold
from sklearn.metrics import roc_auc_score, roc_curve, RocCurveDisplay, cohen_kappa_score, log_loss, f1_score
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier
from sklearn.feature_selection import RFE, RFECV
from sklearn.isotonic import IsotonicRegression
from sklearn.calibration import CalibrationDisplay
from sklearn.inspection import PartialDependenceDisplay, permutation_importance
from sklearn.linear_model import LogisticRegression
from collections import Counter
from sklearn.ensemble import RandomForestClassifier, HistGradientBoostingClassifier, GradientBoostingClassifier, ExtraTreesClassifier
from sklearn.svm import SVC
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier
from catboost import CatBoostClassifier

from sklearn.manifold import TSNE
import optuna

In [366]:
%%time
ens_cv_scores, ens_preds = list(), list()
hill_ens_cv_scores, hill_ens_preds =  list(), list()

ens_cv_scores = []
ens_preds = []

sk = RepeatedStratifiedKFold(n_splits = 7, n_repeats = 2, random_state = 42)
ratio = float(np.sum(Y == 0)) / np.sum(Y == 1)

for i, (train_idx, test_idx) in enumerate(sk.split(X, Y_new_train)):

    X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
    Y_train, Y_test = Y_new_train.iloc[train_idx], Y_new_train.iloc[test_idx]
    X_train_sub, X_val_sub, Y_train_sub, Y_val_sub = train_test_split(X_train, Y_train, test_size=0.2, random_state=42, stratify=Y_train)
    
    print('----------------------------------------------------------')

    ##########
    ## LGBM ##
    ##########

    LGBM_md = LGBMClassifier(objective = 'binary',
                             class_weight='balanced',
                             n_estimators = 3500,
                             max_depth = 9,
                             learning_rate = 0.03,
                             num_leaves = 20,
                             reg_alpha = 2,
                             reg_lambda = 5,
                             subsample = 0.7,
                             colsample_bytree = 0.7).fit(X_train_sub, Y_train_sub, 
                                                            eval_set=[(X_val_sub, Y_val_sub)], 
                                                            early_stopping_rounds=100, 
                                                            verbose=100)

    lgb_pred = LGBM_md.predict_proba(X_test)[:, 1]
    lgb_score = roc_auc_score(Y_test, lgb_pred)

    print('Fold', i, '==> LGBM oof ROC-AUC score is ==>', lgb_score) 

    lgb_pred_test = LGBM_md.predict_proba(test_cv)[:, 1]

    #########
    ## XGB ##
    #########

    XGB_md = XGBClassifier(objective = 'binary:logistic',
                           tree_method = 'hist',
                           colsample_bytree = 0.7, 
                           gamma = 2, 
                           learning_rate = 0.01, 
                           max_depth = 10, 
                           min_child_weight = 10, 
                           n_estimators = 2500, 
                           reg_lambda=5,
                           scale_pos_weight=ratio,
                           subsample = 0.7).fit(X_train_sub, Y_train_sub, 
                                                   eval_metric="auc", 
                                                   eval_set=[(X_val_sub, Y_val_sub)], 
                                                   early_stopping_rounds=150, 
                                                   verbose=100)

    xgb_pred = XGB_md.predict_proba(X_test)[:, 1]
    xgb_score = roc_auc_score(Y_test, xgb_pred)

    print('Fold', i, '==> XGB oof ROC-AUC score is ==>', xgb_score)

    xgb_pred_test = XGB_md.predict_proba(test_cv)[:, 1]

    ##############
    ## CatBoost ##
    ##############

    Cat_md = CatBoostClassifier(auto_class_weights='Balanced',
                                eval_metric='AUC',
                                iterations = 2500,
                                learning_rate = 0.01,
                                depth = 8,
                                random_strength = 0.5,
                                bagging_temperature = 0.7,
                                border_count = 30,
                                l2_leaf_reg = 5,
                                verbose = False, 
                                task_type = 'CPU').fit(X_train_sub, Y_train_sub, 
                                                           eval_set=(X_val_sub, Y_val_sub), 
                                                           early_stopping_rounds=150, 
                                                           verbose=100, 
                                                           use_best_model=True)

    cat_pred = Cat_md.predict_proba(X_test)[:, 1]
    cat_score = roc_auc_score(Y_test, cat_pred)

    print('Fold', i, '==> CatBoost oof ROC-AUC score is ==>', cat_score)

    cat_pred_test = Cat_md.predict_proba(test_cv)[:, 1]    
    
    ##############
    ## Ensemble ##
    ##############
    
    
    print('Fold', i, '==> Average Ensemble oof ROC-AUC score is ==>', ens_score_fold)
    
    # Ансамбль моделей
    ens_pred_1 = ( lgb_pred + xgb_pred + cat_pred ) / 3
    ens_pred_2 = ( lgb_pred_test + xgb_pred_test + cat_pred_test ) / 3
    ens_score_fold = roc_auc_score(Y_test, ens_pred_1)
    ens_cv_scores.append(ens_score_fold)
    ens_preds.append(ens_pred_2)

    # Поиск оптимального порога для F1
    thresholds = np.linspace(0, 1, 300)
    f1_scores = [f1_score(Y_test, ens_pred_1 > thresh) for thresh in thresholds]
    optimal_threshold = thresholds[np.argmax(f1_scores)]
    ens_f1_score = max(f1_scores)

ens_mean_score = np.mean(ens_cv_scores)
optimal_threshold, ens_mean_score

----------------------------------------------------------




[100]	valid_0's binary_logloss: 0.613776
[200]	valid_0's binary_logloss: 0.588645
[300]	valid_0's binary_logloss: 0.572096
[400]	valid_0's binary_logloss: 0.561181
[500]	valid_0's binary_logloss: 0.552708
[600]	valid_0's binary_logloss: 0.545111
[700]	valid_0's binary_logloss: 0.538404
[800]	valid_0's binary_logloss: 0.531715
[900]	valid_0's binary_logloss: 0.52683
[1000]	valid_0's binary_logloss: 0.521617
[1100]	valid_0's binary_logloss: 0.51672
[1200]	valid_0's binary_logloss: 0.5128
[1300]	valid_0's binary_logloss: 0.508855
[1400]	valid_0's binary_logloss: 0.504863
[1500]	valid_0's binary_logloss: 0.50139
[1600]	valid_0's binary_logloss: 0.499353
[1700]	valid_0's binary_logloss: 0.496953
[1800]	valid_0's binary_logloss: 0.494454
[1900]	valid_0's binary_logloss: 0.493153
[2000]	valid_0's binary_logloss: 0.492527
[2100]	valid_0's binary_logloss: 0.491723
[2200]	valid_0's binary_logloss: 0.491123
[2300]	valid_0's binary_logloss: 0.490816
[2400]	valid_0's binary_logloss: 0.489855
[2500]



[100]	validation_0-auc:0.71606
[200]	validation_0-auc:0.72438
[300]	validation_0-auc:0.72645
[400]	validation_0-auc:0.72773
[500]	validation_0-auc:0.73015
[600]	validation_0-auc:0.73094
[700]	validation_0-auc:0.73202
[800]	validation_0-auc:0.73076
[855]	validation_0-auc:0.73132
Fold 0 ==> XGB oof ROC-AUC score is ==> 0.711843463672732
0:	test: 0.6300929	best: 0.6300929 (0)	total: 2.73ms	remaining: 6.83s
100:	test: 0.6940799	best: 0.6940799 (100)	total: 292ms	remaining: 6.93s
200:	test: 0.7094875	best: 0.7094875 (200)	total: 579ms	remaining: 6.63s
300:	test: 0.7162579	best: 0.7165425 (298)	total: 883ms	remaining: 6.45s
400:	test: 0.7202695	best: 0.7205182 (399)	total: 1.19s	remaining: 6.22s
500:	test: 0.7235101	best: 0.7235773 (497)	total: 1.49s	remaining: 5.96s
600:	test: 0.7254935	best: 0.7254935 (600)	total: 1.8s	remaining: 5.7s
700:	test: 0.7256347	best: 0.7260470 (689)	total: 2.11s	remaining: 5.42s
800:	test: 0.7262779	best: 0.7265154 (796)	total: 2.41s	remaining: 5.12s
900:	test: 



[100]	valid_0's binary_logloss: 0.614521
[200]	valid_0's binary_logloss: 0.594308
[300]	valid_0's binary_logloss: 0.583487
[400]	valid_0's binary_logloss: 0.575901
[500]	valid_0's binary_logloss: 0.567977
[600]	valid_0's binary_logloss: 0.561422
[700]	valid_0's binary_logloss: 0.5551
[800]	valid_0's binary_logloss: 0.550608
[900]	valid_0's binary_logloss: 0.546399
[1000]	valid_0's binary_logloss: 0.542019
[1100]	valid_0's binary_logloss: 0.538149
[1200]	valid_0's binary_logloss: 0.534497
[1300]	valid_0's binary_logloss: 0.53212
[1400]	valid_0's binary_logloss: 0.53017
[1500]	valid_0's binary_logloss: 0.527557
[1600]	valid_0's binary_logloss: 0.524952
[1700]	valid_0's binary_logloss: 0.523083
[1800]	valid_0's binary_logloss: 0.521565
[1900]	valid_0's binary_logloss: 0.520578
[2000]	valid_0's binary_logloss: 0.519887
Fold 1 ==> LGBM oof ROC-AUC score is ==> 0.6725835591689251
[0]	validation_0-auc:0.61555




[100]	validation_0-auc:0.70684
[200]	validation_0-auc:0.70636
[250]	validation_0-auc:0.70476
Fold 1 ==> XGB oof ROC-AUC score is ==> 0.6757484836753129
0:	test: 0.6361999	best: 0.6361999 (0)	total: 3.31ms	remaining: 8.27s
100:	test: 0.6790376	best: 0.6790376 (100)	total: 300ms	remaining: 7.13s
200:	test: 0.6917379	best: 0.6919553 (198)	total: 609ms	remaining: 6.96s
300:	test: 0.6999157	best: 0.6999494 (297)	total: 911ms	remaining: 6.66s
400:	test: 0.7064642	best: 0.7066928 (397)	total: 1.22s	remaining: 6.36s
500:	test: 0.7071052	best: 0.7071859 (499)	total: 1.52s	remaining: 6.06s
600:	test: 0.7089519	best: 0.7093486 (594)	total: 1.83s	remaining: 5.79s
700:	test: 0.7112625	best: 0.7115224 (675)	total: 2.13s	remaining: 5.48s
800:	test: 0.7119662	best: 0.7121880 (798)	total: 2.42s	remaining: 5.13s
900:	test: 0.7127192	best: 0.7127909 (883)	total: 2.72s	remaining: 4.83s
1000:	test: 0.7127394	best: 0.7131091 (984)	total: 3.02s	remaining: 4.52s
1100:	test: 0.7127416	best: 0.7131517 (1009)	to



[100]	valid_0's binary_logloss: 0.614907
[200]	valid_0's binary_logloss: 0.592169
[300]	valid_0's binary_logloss: 0.579064
[400]	valid_0's binary_logloss: 0.56893
[500]	valid_0's binary_logloss: 0.560971
[600]	valid_0's binary_logloss: 0.554065
[700]	valid_0's binary_logloss: 0.546649
[800]	valid_0's binary_logloss: 0.541452
[900]	valid_0's binary_logloss: 0.536159
[1000]	valid_0's binary_logloss: 0.532136
[1100]	valid_0's binary_logloss: 0.528254
[1200]	valid_0's binary_logloss: 0.525418
[1300]	valid_0's binary_logloss: 0.521983
[1400]	valid_0's binary_logloss: 0.519206
[1500]	valid_0's binary_logloss: 0.516439
[1600]	valid_0's binary_logloss: 0.514061
[1700]	valid_0's binary_logloss: 0.512389
[1800]	valid_0's binary_logloss: 0.511275
[1900]	valid_0's binary_logloss: 0.509714
[2000]	valid_0's binary_logloss: 0.508223
[2100]	valid_0's binary_logloss: 0.507564
[2200]	valid_0's binary_logloss: 0.505896
[2300]	valid_0's binary_logloss: 0.504685
[2400]	valid_0's binary_logloss: 0.504756
Fo



[100]	validation_0-auc:0.70470
[200]	validation_0-auc:0.71248
[300]	validation_0-auc:0.71719
[400]	validation_0-auc:0.71896
[500]	validation_0-auc:0.72103
[600]	validation_0-auc:0.72349
[700]	validation_0-auc:0.72449
[800]	validation_0-auc:0.72398
[882]	validation_0-auc:0.72443
Fold 2 ==> XGB oof ROC-AUC score is ==> 0.7170337122190649
0:	test: 0.6102389	best: 0.6102389 (0)	total: 3.29ms	remaining: 8.22s
100:	test: 0.6822446	best: 0.6822446 (100)	total: 279ms	remaining: 6.63s
200:	test: 0.6932798	best: 0.6933538 (199)	total: 573ms	remaining: 6.55s
300:	test: 0.7010744	best: 0.7010744 (300)	total: 876ms	remaining: 6.4s
400:	test: 0.7051644	best: 0.7051644 (400)	total: 1.17s	remaining: 6.12s
500:	test: 0.7074369	best: 0.7074526 (499)	total: 1.44s	remaining: 5.73s
600:	test: 0.7088555	best: 0.7091536 (566)	total: 1.72s	remaining: 5.45s
700:	test: 0.7105475	best: 0.7105767 (699)	total: 2.01s	remaining: 5.15s
800:	test: 0.7126519	best: 0.7128895 (789)	total: 2.28s	remaining: 4.83s
900:	test



[100]	valid_0's binary_logloss: 0.606725
[200]	valid_0's binary_logloss: 0.586381
[300]	valid_0's binary_logloss: 0.575562
[400]	valid_0's binary_logloss: 0.567019
[500]	valid_0's binary_logloss: 0.559477
[600]	valid_0's binary_logloss: 0.553487
[700]	valid_0's binary_logloss: 0.548387
[800]	valid_0's binary_logloss: 0.543237
[900]	valid_0's binary_logloss: 0.539018
[1000]	valid_0's binary_logloss: 0.535086
[1100]	valid_0's binary_logloss: 0.531407
[1200]	valid_0's binary_logloss: 0.527048
[1300]	valid_0's binary_logloss: 0.524854
[1400]	valid_0's binary_logloss: 0.522352
[1500]	valid_0's binary_logloss: 0.520228
[1600]	valid_0's binary_logloss: 0.518416
[1700]	valid_0's binary_logloss: 0.515947
[1800]	valid_0's binary_logloss: 0.513388
[1900]	valid_0's binary_logloss: 0.512853
[2000]	valid_0's binary_logloss: 0.511309
[2100]	valid_0's binary_logloss: 0.510005
[2200]	valid_0's binary_logloss: 0.509213
[2300]	valid_0's binary_logloss: 0.508078
[2400]	valid_0's binary_logloss: 0.507385
[



[100]	validation_0-auc:0.71471
[200]	validation_0-auc:0.71776
[300]	validation_0-auc:0.71962
[400]	validation_0-auc:0.71976
[500]	validation_0-auc:0.72058
[600]	validation_0-auc:0.72221
[700]	validation_0-auc:0.72226
[800]	validation_0-auc:0.72273
[900]	validation_0-auc:0.72189
[902]	validation_0-auc:0.72176
Fold 3 ==> XGB oof ROC-AUC score is ==> 0.7137787393438388
0:	test: 0.6234648	best: 0.6234648 (0)	total: 3.22ms	remaining: 8.06s
100:	test: 0.7075781	best: 0.7075781 (100)	total: 279ms	remaining: 6.62s
200:	test: 0.7112109	best: 0.7112400 (199)	total: 585ms	remaining: 6.7s
300:	test: 0.7146914	best: 0.7146914 (300)	total: 892ms	remaining: 6.52s
400:	test: 0.7173045	best: 0.7173336 (398)	total: 1.18s	remaining: 6.2s
500:	test: 0.7190615	best: 0.7195232 (489)	total: 1.45s	remaining: 5.8s
600:	test: 0.7191848	best: 0.7196352 (534)	total: 1.74s	remaining: 5.5s
Stopped by overfitting detector  (150 iterations wait)

bestTest = 0.7196352374
bestIteration = 534

Shrink model to first 535 



[100]	valid_0's binary_logloss: 0.607388
[200]	valid_0's binary_logloss: 0.585177
[300]	valid_0's binary_logloss: 0.572592
[400]	valid_0's binary_logloss: 0.56413
[500]	valid_0's binary_logloss: 0.556871
[600]	valid_0's binary_logloss: 0.55114
[700]	valid_0's binary_logloss: 0.545421
[800]	valid_0's binary_logloss: 0.540913
[900]	valid_0's binary_logloss: 0.536002
[1000]	valid_0's binary_logloss: 0.531962
[1100]	valid_0's binary_logloss: 0.528147
[1200]	valid_0's binary_logloss: 0.524589
[1300]	valid_0's binary_logloss: 0.521727
[1400]	valid_0's binary_logloss: 0.519761
[1500]	valid_0's binary_logloss: 0.517158
[1600]	valid_0's binary_logloss: 0.515417
[1700]	valid_0's binary_logloss: 0.514397
[1800]	valid_0's binary_logloss: 0.512954
[1900]	valid_0's binary_logloss: 0.511098
[2000]	valid_0's binary_logloss: 0.510981
Fold 4 ==> LGBM oof ROC-AUC score is ==> 0.6788975716869027
[0]	validation_0-auc:0.61279




[100]	validation_0-auc:0.69472
[200]	validation_0-auc:0.70527
[300]	validation_0-auc:0.70855
[400]	validation_0-auc:0.70911
[500]	validation_0-auc:0.70861
[518]	validation_0-auc:0.70913
Fold 4 ==> XGB oof ROC-AUC score is ==> 0.6965674244381297
0:	test: 0.5874536	best: 0.5874536 (0)	total: 3.92ms	remaining: 9.8s
100:	test: 0.6775293	best: 0.6775293 (100)	total: 308ms	remaining: 7.32s
200:	test: 0.6909110	best: 0.6909244 (199)	total: 621ms	remaining: 7.1s
300:	test: 0.6985643	best: 0.6985643 (300)	total: 910ms	remaining: 6.65s
400:	test: 0.7032931	best: 0.7033581 (397)	total: 1.22s	remaining: 6.37s
500:	test: 0.7052384	best: 0.7052384 (500)	total: 1.52s	remaining: 6.05s
600:	test: 0.7068161	best: 0.7068385 (599)	total: 1.83s	remaining: 5.78s
700:	test: 0.7064911	best: 0.7074414 (672)	total: 2.11s	remaining: 5.42s
800:	test: 0.7065696	best: 0.7074414 (672)	total: 2.4s	remaining: 5.09s
Stopped by overfitting detector  (150 iterations wait)

bestTest = 0.7074413726
bestIteration = 672

Shr



[100]	valid_0's binary_logloss: 0.610036
[200]	valid_0's binary_logloss: 0.58904
[300]	valid_0's binary_logloss: 0.576986
[400]	valid_0's binary_logloss: 0.569739
[500]	valid_0's binary_logloss: 0.562277
[600]	valid_0's binary_logloss: 0.555576
[700]	valid_0's binary_logloss: 0.549775
[800]	valid_0's binary_logloss: 0.543424
[900]	valid_0's binary_logloss: 0.537939
[1000]	valid_0's binary_logloss: 0.533167
[1100]	valid_0's binary_logloss: 0.529035
[1200]	valid_0's binary_logloss: 0.526284
[1300]	valid_0's binary_logloss: 0.523857
[1400]	valid_0's binary_logloss: 0.520844
[1500]	valid_0's binary_logloss: 0.519661
[1600]	valid_0's binary_logloss: 0.517633
[1700]	valid_0's binary_logloss: 0.516984
[1800]	valid_0's binary_logloss: 0.515632
[1900]	valid_0's binary_logloss: 0.514508
[2000]	valid_0's binary_logloss: 0.513338
[2100]	valid_0's binary_logloss: 0.51211
[2200]	valid_0's binary_logloss: 0.511372
[2300]	valid_0's binary_logloss: 0.510859
[2400]	valid_0's binary_logloss: 0.51003
[250



[100]	validation_0-auc:0.69071
[200]	validation_0-auc:0.69840
[300]	validation_0-auc:0.69956
[400]	validation_0-auc:0.70085
[500]	validation_0-auc:0.70257
[600]	validation_0-auc:0.70407
[700]	validation_0-auc:0.70498
[800]	validation_0-auc:0.70696
[900]	validation_0-auc:0.70792
[1000]	validation_0-auc:0.70722
[1027]	validation_0-auc:0.70726
Fold 5 ==> XGB oof ROC-AUC score is ==> 0.6935320330663911
0:	test: 0.5891905	best: 0.5891905 (0)	total: 3.29ms	remaining: 8.21s
100:	test: 0.6753241	best: 0.6756020 (95)	total: 324ms	remaining: 7.69s
200:	test: 0.6841361	best: 0.6841361 (200)	total: 628ms	remaining: 7.18s
300:	test: 0.6899226	best: 0.6899226 (300)	total: 944ms	remaining: 6.9s
400:	test: 0.6942054	best: 0.6942054 (400)	total: 1.25s	remaining: 6.57s
500:	test: 0.6964667	best: 0.6964667 (500)	total: 1.55s	remaining: 6.19s
600:	test: 0.6969776	best: 0.6969776 (600)	total: 1.86s	remaining: 5.88s
700:	test: 0.6978875	best: 0.6978875 (700)	total: 2.12s	remaining: 5.44s
800:	test: 0.698225



[100]	valid_0's binary_logloss: 0.61651
[200]	valid_0's binary_logloss: 0.59548
[300]	valid_0's binary_logloss: 0.584079
[400]	valid_0's binary_logloss: 0.57505
[500]	valid_0's binary_logloss: 0.566792
[600]	valid_0's binary_logloss: 0.558461
[700]	valid_0's binary_logloss: 0.552677
[800]	valid_0's binary_logloss: 0.54751
[900]	valid_0's binary_logloss: 0.542348
[1000]	valid_0's binary_logloss: 0.537905
[1100]	valid_0's binary_logloss: 0.533525
[1200]	valid_0's binary_logloss: 0.530337
[1300]	valid_0's binary_logloss: 0.52723
[1400]	valid_0's binary_logloss: 0.524445
[1500]	valid_0's binary_logloss: 0.522431
[1600]	valid_0's binary_logloss: 0.5202
[1700]	valid_0's binary_logloss: 0.518689
[1800]	valid_0's binary_logloss: 0.516972
[1900]	valid_0's binary_logloss: 0.516559
[2000]	valid_0's binary_logloss: 0.51564
[2100]	valid_0's binary_logloss: 0.515355
[2200]	valid_0's binary_logloss: 0.514493
[2300]	valid_0's binary_logloss: 0.514388
[2400]	valid_0's binary_logloss: 0.514685
Fold 6 ==



[100]	validation_0-auc:0.70730
[200]	validation_0-auc:0.70923
[300]	validation_0-auc:0.71051
[400]	validation_0-auc:0.71026
[500]	validation_0-auc:0.71042
[509]	validation_0-auc:0.71080
Fold 6 ==> XGB oof ROC-AUC score is ==> 0.7360598036683028
0:	test: 0.6123243	best: 0.6123243 (0)	total: 3.52ms	remaining: 8.79s
100:	test: 0.6925111	best: 0.6925111 (100)	total: 295ms	remaining: 7.01s
200:	test: 0.7022846	best: 0.7022846 (200)	total: 603ms	remaining: 6.89s
300:	test: 0.7076319	best: 0.7078829 (279)	total: 908ms	remaining: 6.63s
400:	test: 0.7097968	best: 0.7097968 (400)	total: 1.22s	remaining: 6.4s
500:	test: 0.7111840	best: 0.7114305 (470)	total: 1.54s	remaining: 6.13s
600:	test: 0.7113006	best: 0.7120289 (546)	total: 1.84s	remaining: 5.83s
700:	test: 0.7130688	best: 0.7130688 (700)	total: 2.15s	remaining: 5.53s
800:	test: 0.7130128	best: 0.7136941 (735)	total: 2.46s	remaining: 5.21s
Stopped by overfitting detector  (150 iterations wait)

bestTest = 0.713694062
bestIteration = 735

Sh



[100]	valid_0's binary_logloss: 0.622867
[200]	valid_0's binary_logloss: 0.602381
[300]	valid_0's binary_logloss: 0.588443
[400]	valid_0's binary_logloss: 0.580193
[500]	valid_0's binary_logloss: 0.571402
[600]	valid_0's binary_logloss: 0.564148
[700]	valid_0's binary_logloss: 0.557182
[800]	valid_0's binary_logloss: 0.552158
[900]	valid_0's binary_logloss: 0.54725
[1000]	valid_0's binary_logloss: 0.542855
[1100]	valid_0's binary_logloss: 0.540322
[1200]	valid_0's binary_logloss: 0.53679
[1300]	valid_0's binary_logloss: 0.534282
[1400]	valid_0's binary_logloss: 0.531782
[1500]	valid_0's binary_logloss: 0.529704
[1600]	valid_0's binary_logloss: 0.527876
[1700]	valid_0's binary_logloss: 0.526314
[1800]	valid_0's binary_logloss: 0.526264
[1900]	valid_0's binary_logloss: 0.525567
[2000]	valid_0's binary_logloss: 0.524794
[2100]	valid_0's binary_logloss: 0.523928
[2200]	valid_0's binary_logloss: 0.52351
[2300]	valid_0's binary_logloss: 0.523393
[2400]	valid_0's binary_logloss: 0.523286
Fold



[100]	validation_0-auc:0.64847
[200]	validation_0-auc:0.66072
[300]	validation_0-auc:0.66638
[400]	validation_0-auc:0.67127
[500]	validation_0-auc:0.67193
[600]	validation_0-auc:0.67372
[700]	validation_0-auc:0.67500
[800]	validation_0-auc:0.67597
[900]	validation_0-auc:0.67665
[1000]	validation_0-auc:0.67688
[1100]	validation_0-auc:0.67707
[1200]	validation_0-auc:0.67790
[1300]	validation_0-auc:0.67823
[1400]	validation_0-auc:0.67800
[1500]	validation_0-auc:0.67885
[1600]	validation_0-auc:0.67867
[1700]	validation_0-auc:0.67935
[1800]	validation_0-auc:0.67914
[1874]	validation_0-auc:0.67923
Fold 7 ==> XGB oof ROC-AUC score is ==> 0.7278616595689766
0:	test: 0.5926395	best: 0.5926395 (0)	total: 2.89ms	remaining: 7.23s
100:	test: 0.6430476	best: 0.6430476 (100)	total: 282ms	remaining: 6.71s
200:	test: 0.6490471	best: 0.6492846 (199)	total: 574ms	remaining: 6.56s
300:	test: 0.6541590	best: 0.6542845 (298)	total: 883ms	remaining: 6.45s
400:	test: 0.6563015	best: 0.6563015 (400)	total: 1.1



[100]	valid_0's binary_logloss: 0.613516
[200]	valid_0's binary_logloss: 0.588654
[300]	valid_0's binary_logloss: 0.574827
[400]	valid_0's binary_logloss: 0.563642
[500]	valid_0's binary_logloss: 0.554484
[600]	valid_0's binary_logloss: 0.547225
[700]	valid_0's binary_logloss: 0.539879
[800]	valid_0's binary_logloss: 0.533847
[900]	valid_0's binary_logloss: 0.529591
[1000]	valid_0's binary_logloss: 0.524828
[1100]	valid_0's binary_logloss: 0.52096
[1200]	valid_0's binary_logloss: 0.517075
[1300]	valid_0's binary_logloss: 0.514009
[1400]	valid_0's binary_logloss: 0.510847
[1500]	valid_0's binary_logloss: 0.508429
[1600]	valid_0's binary_logloss: 0.506188
[1700]	valid_0's binary_logloss: 0.504655
[1800]	valid_0's binary_logloss: 0.503957
[1900]	valid_0's binary_logloss: 0.502199
[2000]	valid_0's binary_logloss: 0.501197
[2100]	valid_0's binary_logloss: 0.500176
[2200]	valid_0's binary_logloss: 0.499147
[2300]	valid_0's binary_logloss: 0.498907
[2400]	valid_0's binary_logloss: 0.498812
[2



[100]	validation_0-auc:0.70769
[200]	validation_0-auc:0.71625
[300]	validation_0-auc:0.72301
[400]	validation_0-auc:0.72524
[500]	validation_0-auc:0.72626
[600]	validation_0-auc:0.72683
[700]	validation_0-auc:0.72726
[800]	validation_0-auc:0.72706
[881]	validation_0-auc:0.72756
Fold 8 ==> XGB oof ROC-AUC score is ==> 0.7057684862562911
0:	test: 0.5909072	best: 0.5909072 (0)	total: 2.94ms	remaining: 7.35s
100:	test: 0.6845395	best: 0.6851132 (92)	total: 273ms	remaining: 6.49s
200:	test: 0.6999650	best: 0.6999650 (200)	total: 572ms	remaining: 6.54s
300:	test: 0.7070245	best: 0.7071007 (291)	total: 877ms	remaining: 6.41s
400:	test: 0.7100680	best: 0.7100680 (400)	total: 1.19s	remaining: 6.25s
500:	test: 0.7121365	best: 0.7121791 (495)	total: 1.51s	remaining: 6.04s
600:	test: 0.7130755	best: 0.7131898 (591)	total: 1.81s	remaining: 5.73s
700:	test: 0.7140123	best: 0.7143328 (687)	total: 2.12s	remaining: 5.45s
800:	test: 0.7141759	best: 0.7145995 (781)	total: 2.43s	remaining: 5.16s
900:	test



[100]	valid_0's binary_logloss: 0.602818
[200]	valid_0's binary_logloss: 0.577726
[300]	valid_0's binary_logloss: 0.564678
[400]	valid_0's binary_logloss: 0.552945
[500]	valid_0's binary_logloss: 0.543925
[600]	valid_0's binary_logloss: 0.535937
[700]	valid_0's binary_logloss: 0.527996
[800]	valid_0's binary_logloss: 0.521027
[900]	valid_0's binary_logloss: 0.515282
[1000]	valid_0's binary_logloss: 0.509232
[1100]	valid_0's binary_logloss: 0.505221
[1200]	valid_0's binary_logloss: 0.500755
[1300]	valid_0's binary_logloss: 0.497119
[1400]	valid_0's binary_logloss: 0.49374
[1500]	valid_0's binary_logloss: 0.491368
[1600]	valid_0's binary_logloss: 0.488787
[1700]	valid_0's binary_logloss: 0.487273
[1800]	valid_0's binary_logloss: 0.484416
[1900]	valid_0's binary_logloss: 0.481945
[2000]	valid_0's binary_logloss: 0.480489
[2100]	valid_0's binary_logloss: 0.479055
[2200]	valid_0's binary_logloss: 0.478141
[2300]	valid_0's binary_logloss: 0.476954
[2400]	valid_0's binary_logloss: 0.476261
[2



[100]	validation_0-auc:0.73332
[200]	validation_0-auc:0.73712
[300]	validation_0-auc:0.73851
[400]	validation_0-auc:0.73851
[500]	validation_0-auc:0.73860
[529]	validation_0-auc:0.73781
Fold 9 ==> XGB oof ROC-AUC score is ==> 0.7014046757943684
0:	test: 0.6200662	best: 0.6200662 (0)	total: 3.47ms	remaining: 8.67s
100:	test: 0.7134632	best: 0.7141109 (99)	total: 297ms	remaining: 7.04s
200:	test: 0.7271721	best: 0.7273065 (198)	total: 605ms	remaining: 6.92s
300:	test: 0.7348680	best: 0.7348680 (300)	total: 921ms	remaining: 6.73s
400:	test: 0.7392158	best: 0.7392158 (400)	total: 1.24s	remaining: 6.46s
500:	test: 0.7416474	best: 0.7417886 (496)	total: 1.55s	remaining: 6.17s
600:	test: 0.7421539	best: 0.7427657 (530)	total: 1.83s	remaining: 5.8s
700:	test: 0.7423332	best: 0.7430212 (661)	total: 2.14s	remaining: 5.5s
800:	test: 0.7445720	best: 0.7445720 (800)	total: 2.45s	remaining: 5.21s
900:	test: 0.7444264	best: 0.7446438 (805)	total: 2.75s	remaining: 4.87s
Stopped by overfitting detector



[100]	valid_0's binary_logloss: 0.608381
[200]	valid_0's binary_logloss: 0.585665
[300]	valid_0's binary_logloss: 0.57246
[400]	valid_0's binary_logloss: 0.561857
[500]	valid_0's binary_logloss: 0.554744
[600]	valid_0's binary_logloss: 0.547565
[700]	valid_0's binary_logloss: 0.541698
[800]	valid_0's binary_logloss: 0.535484
[900]	valid_0's binary_logloss: 0.530806
[1000]	valid_0's binary_logloss: 0.525719
[1100]	valid_0's binary_logloss: 0.521557
[1200]	valid_0's binary_logloss: 0.518209
[1300]	valid_0's binary_logloss: 0.514837
[1400]	valid_0's binary_logloss: 0.512856
[1500]	valid_0's binary_logloss: 0.510712
[1600]	valid_0's binary_logloss: 0.508405
[1700]	valid_0's binary_logloss: 0.506278
[1800]	valid_0's binary_logloss: 0.505015
[1900]	valid_0's binary_logloss: 0.504303
[2000]	valid_0's binary_logloss: 0.503284
[2100]	valid_0's binary_logloss: 0.502596
[2200]	valid_0's binary_logloss: 0.501613
[2300]	valid_0's binary_logloss: 0.501048
[2400]	valid_0's binary_logloss: 0.500686
[2



[100]	validation_0-auc:0.70344
[200]	validation_0-auc:0.71093
[300]	validation_0-auc:0.71302
[400]	validation_0-auc:0.71505
[500]	validation_0-auc:0.71549
[600]	validation_0-auc:0.71675
[700]	validation_0-auc:0.71674
[800]	validation_0-auc:0.71721
[900]	validation_0-auc:0.71702
[956]	validation_0-auc:0.71718
Fold 10 ==> XGB oof ROC-AUC score is ==> 0.715848617928184
0:	test: 0.6134897	best: 0.6134897 (0)	total: 3.34ms	remaining: 8.35s
100:	test: 0.6915855	best: 0.6915855 (100)	total: 297ms	remaining: 7.05s
200:	test: 0.7022039	best: 0.7022039 (200)	total: 588ms	remaining: 6.73s
300:	test: 0.7080016	best: 0.7081563 (299)	total: 881ms	remaining: 6.44s
400:	test: 0.7126138	best: 0.7126138 (400)	total: 1.16s	remaining: 6.06s
500:	test: 0.7130867	best: 0.7132212 (458)	total: 1.46s	remaining: 5.83s
600:	test: 0.7136761	best: 0.7142499 (591)	total: 1.76s	remaining: 5.55s
700:	test: 0.7135775	best: 0.7142499 (591)	total: 2.07s	remaining: 5.31s
Stopped by overfitting detector  (150 iterations w



[100]	valid_0's binary_logloss: 0.61786
[200]	valid_0's binary_logloss: 0.595386
[300]	valid_0's binary_logloss: 0.583456
[400]	valid_0's binary_logloss: 0.571897
[500]	valid_0's binary_logloss: 0.563274
[600]	valid_0's binary_logloss: 0.556134
[700]	valid_0's binary_logloss: 0.548595
[800]	valid_0's binary_logloss: 0.542579
[900]	valid_0's binary_logloss: 0.53731
[1000]	valid_0's binary_logloss: 0.532831
[1100]	valid_0's binary_logloss: 0.52947
[1200]	valid_0's binary_logloss: 0.525692
[1300]	valid_0's binary_logloss: 0.522694
[1400]	valid_0's binary_logloss: 0.520067
[1500]	valid_0's binary_logloss: 0.518197
[1600]	valid_0's binary_logloss: 0.516429
[1700]	valid_0's binary_logloss: 0.514626
[1800]	valid_0's binary_logloss: 0.513218
[1900]	valid_0's binary_logloss: 0.512159
[2000]	valid_0's binary_logloss: 0.511016
[2100]	valid_0's binary_logloss: 0.509783
[2200]	valid_0's binary_logloss: 0.509739
[2300]	valid_0's binary_logloss: 0.508547
[2400]	valid_0's binary_logloss: 0.508328
Fold



[100]	validation_0-auc:0.69314
[200]	validation_0-auc:0.70252
[300]	validation_0-auc:0.70588
[400]	validation_0-auc:0.70818
[500]	validation_0-auc:0.70878
[600]	validation_0-auc:0.71079
[700]	validation_0-auc:0.71128
[800]	validation_0-auc:0.71247
[900]	validation_0-auc:0.71274
[1000]	validation_0-auc:0.71214
[1037]	validation_0-auc:0.71196
Fold 11 ==> XGB oof ROC-AUC score is ==> 0.7149573753552053
0:	test: 0.6102826	best: 0.6102826 (0)	total: 3.1ms	remaining: 7.75s
100:	test: 0.6770273	best: 0.6770273 (100)	total: 297ms	remaining: 7.05s
200:	test: 0.6862988	best: 0.6864019 (199)	total: 595ms	remaining: 6.8s
300:	test: 0.6934905	best: 0.6934905 (300)	total: 895ms	remaining: 6.54s
400:	test: 0.6966908	best: 0.6967042 (399)	total: 1.21s	remaining: 6.32s
500:	test: 0.6998395	best: 0.6998395 (500)	total: 1.52s	remaining: 6.07s
600:	test: 0.7020269	best: 0.7020269 (600)	total: 1.81s	remaining: 5.72s
700:	test: 0.7032393	best: 0.7033782 (699)	total: 2.11s	remaining: 5.41s
800:	test: 0.70417



[100]	valid_0's binary_logloss: 0.605717
[200]	valid_0's binary_logloss: 0.582763
[300]	valid_0's binary_logloss: 0.569823
[400]	valid_0's binary_logloss: 0.559249
[500]	valid_0's binary_logloss: 0.550365
[600]	valid_0's binary_logloss: 0.54279
[700]	valid_0's binary_logloss: 0.535554
[800]	valid_0's binary_logloss: 0.529571
[900]	valid_0's binary_logloss: 0.525099
[1000]	valid_0's binary_logloss: 0.520238
[1100]	valid_0's binary_logloss: 0.517153
[1200]	valid_0's binary_logloss: 0.515044
[1300]	valid_0's binary_logloss: 0.513224
[1400]	valid_0's binary_logloss: 0.510826
[1500]	valid_0's binary_logloss: 0.509117
[1600]	valid_0's binary_logloss: 0.508158
[1700]	valid_0's binary_logloss: 0.507279
[1800]	valid_0's binary_logloss: 0.506821
[1900]	valid_0's binary_logloss: 0.506108
[2000]	valid_0's binary_logloss: 0.505535
[2100]	valid_0's binary_logloss: 0.505294
[2200]	valid_0's binary_logloss: 0.505121
Fold 12 ==> LGBM oof ROC-AUC score is ==> 0.6853203306639112
[0]	validation_0-auc:0.61



[100]	validation_0-auc:0.69468
[200]	validation_0-auc:0.70352
[300]	validation_0-auc:0.70674
[400]	validation_0-auc:0.70942
[500]	validation_0-auc:0.70880
[547]	validation_0-auc:0.70921
Fold 12 ==> XGB oof ROC-AUC score is ==> 0.6967966933608888
0:	test: 0.6546611	best: 0.6546611 (0)	total: 3.37ms	remaining: 8.41s
100:	test: 0.7038713	best: 0.7041985 (98)	total: 282ms	remaining: 6.69s
200:	test: 0.7109061	best: 0.7110630 (195)	total: 590ms	remaining: 6.75s
300:	test: 0.7106977	best: 0.7117152 (220)	total: 902ms	remaining: 6.59s
400:	test: 0.7142252	best: 0.7142252 (400)	total: 1.19s	remaining: 6.24s
500:	test: 0.7157738	best: 0.7158209 (499)	total: 1.48s	remaining: 5.92s
600:	test: 0.7171790	best: 0.7173740 (584)	total: 1.79s	remaining: 5.66s
700:	test: 0.7179499	best: 0.7182099 (696)	total: 2.09s	remaining: 5.37s
800:	test: 0.7177796	best: 0.7184362 (718)	total: 2.38s	remaining: 5.05s
Stopped by overfitting detector  (150 iterations wait)

bestTest = 0.718436245
bestIteration = 718

S



[100]	valid_0's binary_logloss: 0.616945
[200]	valid_0's binary_logloss: 0.598139
[300]	valid_0's binary_logloss: 0.58828
[400]	valid_0's binary_logloss: 0.580938
[500]	valid_0's binary_logloss: 0.573791
[600]	valid_0's binary_logloss: 0.567426
[700]	valid_0's binary_logloss: 0.561764
[800]	valid_0's binary_logloss: 0.556275
[900]	valid_0's binary_logloss: 0.55165
[1000]	valid_0's binary_logloss: 0.547858
[1100]	valid_0's binary_logloss: 0.54432
[1200]	valid_0's binary_logloss: 0.541828
[1300]	valid_0's binary_logloss: 0.539463
[1400]	valid_0's binary_logloss: 0.537359
[1500]	valid_0's binary_logloss: 0.536198
[1600]	valid_0's binary_logloss: 0.534727
[1700]	valid_0's binary_logloss: 0.533854
[1800]	valid_0's binary_logloss: 0.532378
[1900]	valid_0's binary_logloss: 0.532552
Fold 13 ==> LGBM oof ROC-AUC score is ==> 0.6768696719194007
[0]	validation_0-auc:0.61622




[100]	validation_0-auc:0.67041
[200]	validation_0-auc:0.67590
[300]	validation_0-auc:0.67758
[400]	validation_0-auc:0.68022
[500]	validation_0-auc:0.68043
[600]	validation_0-auc:0.67997
[638]	validation_0-auc:0.68051
Fold 13 ==> XGB oof ROC-AUC score is ==> 0.708996383363472
0:	test: 0.6000486	best: 0.6000486 (0)	total: 3.09ms	remaining: 7.71s
100:	test: 0.6546162	best: 0.6553894 (95)	total: 279ms	remaining: 6.63s
200:	test: 0.6697370	best: 0.6697370 (200)	total: 568ms	remaining: 6.49s
300:	test: 0.6783742	best: 0.6784011 (298)	total: 868ms	remaining: 6.34s
400:	test: 0.6805929	best: 0.6811554 (390)	total: 1.15s	remaining: 6.04s
500:	test: 0.6819622	best: 0.6819622 (500)	total: 1.46s	remaining: 5.83s
600:	test: 0.6808349	best: 0.6819622 (500)	total: 1.77s	remaining: 5.6s
Stopped by overfitting detector  (150 iterations wait)

bestTest = 0.6819622239
bestIteration = 500

Shrink model to first 501 iterations.
Fold 13 ==> CatBoost oof ROC-AUC score is ==> 0.7033550762076982
Fold 13 ==> Av

(0.42474916387959866, 0.7093892116111749)

In [367]:
avg_ens_preds = np.mean(ens_preds, axis=0)
final_predictions = (avg_ens_preds > optimal_threshold).astype(int)
final_predictions.sum()

1265

In [368]:
final_predictions

array([1, 0, 0, ..., 1, 1, 1])

In [369]:
from sklearn.metrics import confusion_matrix

# Теперь вы можете сравнить прогнозы с реальными значениями
cm = confusion_matrix(Y_test_sim, final_predictions)

cm

array([[2530,  789],
       [ 364,  476]])

In [241]:
submission = pd.read_csv(r'/kaggle/input/leopard-challenge-classification/sample_submission.csv')

In [243]:
submission['smoking'] = final_predictions
submission.to_csv('Ensemble_submission_05.csv', index = False)

In [245]:
submission['smoking'].sum()

1862