<a href="https://colab.research.google.com/github/Rimich666/sberautopodpiska/blob/master/%D0%9A%D0%BE%D0%BF%D0%B8%D1%8F_%D0%B1%D0%BB%D0%BE%D0%BA%D0%BD%D0%BE%D1%82%D0%B0_%22Task3_step3_AutoML_ipynb%22.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# 0. Монтирование диска с датасетом
from google.colab import drive
drive.mount('/content/drive', force_remount=True)  # Принудительное перемонтирование

Mounted at /content/drive


In [None]:
# 1. Установка библиотек
!pip install -U lightautoml pandas scikit-learn imbalanced-learn

Collecting scikit-learn
  Using cached scikit_learn-1.7.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (11 kB)


In [None]:
# 2. Импорт модулей
import warnings
warnings.simplefilter('ignore', FutureWarning)

import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    precision_score, recall_score, f1_score,
    roc_auc_score, precision_recall_curve, auc
)

from imblearn.over_sampling import RandomOverSampler

from lightautoml.automl.presets.tabular_presets import TabularAutoML
from lightautoml.tasks import Task



In [None]:
# 3. Чтение датасета
df = pd.read_csv(
    '/content/drive/MyDrive/Проектный практикум (хакатон)/cleaned_sessions_new.csv',
    low_memory=False
)

In [None]:
# 4) Метка «1» для целевых событий
target_events = [
    'sub_car_claim_click',
    'sub_car_claim_submit_click',
    'sub_open_dialog_click',
    'sub_custom_question_submit_click',
    'sub_call_number_click',
    'sub_callback_submit_click',
    'sub_submit_success',
    'sub_car_request_submit_click'
]
df['target_column'] = df['event_action'].isin(target_events).astype(int)

In [None]:
# 5) Поведенческие признаки (до удаления event_action и session_id)
df['events_per_session']        = df.groupby('session_id')['event_action'].transform('count')
df['unique_events_per_session'] = df.groupby('session_id')['event_action'].transform('nunique')

In [None]:
# 6) Временные признаки
df['visit_date'] = pd.to_datetime(df['visit_date'], format='%Y-%m-%d', errors='coerce')
df['visit_time'] = pd.to_timedelta(df['visit_time'], errors='coerce')
df['visit_dt']   = df['visit_date'] + df['visit_time']

df['visit_hour']      = df['visit_dt'].dt.hour
df['visit_dayofweek'] = df['visit_dt'].dt.dayofweek

sess_min = df.groupby('session_id')['visit_dt'].transform('min')
sess_max = df.groupby('session_id')['visit_dt'].transform('max')
df['session_duration_s'] = (sess_max - sess_min).dt.total_seconds()

In [None]:
# 7) Удаляем колонки-утечки
df.drop([
    'visit_date', 'visit_time', 'visit_dt',
    'event_action', 'session_id'
], axis=1, inplace=True)

In [None]:
# 8) Формируем сбалансированный подвыбор, где положительных событий в 2 раза меньше отрицательных
n_event = len(df[df['target_column'] == 1])
df_short = pd.concat([
    df[df['target_column'] == 1],
    df[df['target_column'] == 0].sample(n=2*n_event, random_state=12)
]).reset_index(drop=True)

In [None]:
# 9) Разбиение на train/test из этого сбалансированного подвыбора
train_df, test_df = train_test_split(
    df_short,
    test_size=0.2,
    stratify=df_short['target_column'],
    random_state=42
)

In [None]:
# 10) Настройка AutoML
automl = TabularAutoML(
    task=Task('binary'),
    timeout=600,       # 10 минут
    cpu_limit=4,       # до 4 ядер
    reader_params={'n_jobs':4}
)
roles = {'target':'target_column'}

In [None]:
# 11) Обучение и получение OOF‑предсказаний на train
oof_pred    = automl.fit_predict(train_data=train_df, valid_data=None, roles=roles, verbose=1)
train_proba = oof_pred.data.ravel()
y_train_oof = train_df['target_column'].values

[16:28:11] Stdout logging level is INFO.


INFO:lightautoml.automl.presets.base:Stdout logging level is INFO.


[16:28:11] Copying TaskTimer may affect the parent PipelineTimer, so copy will create new unlimited TaskTimer




[16:28:11] Task: binary



INFO:lightautoml.automl.presets.base:Task: binary



[16:28:11] Start automl preset with listed constraints:


INFO:lightautoml.automl.presets.base:Start automl preset with listed constraints:


[16:28:11] - time: 600.00 seconds


INFO:lightautoml.automl.presets.base:- time: 600.00 seconds


[16:28:11] - CPU: 4 cores


INFO:lightautoml.automl.presets.base:- CPU: 4 cores


[16:28:11] - memory: 16 GB



INFO:lightautoml.automl.presets.base:- memory: 16 GB



[16:28:11] [1mTrain data shape: (12736, 22)[0m



INFO:lightautoml.reader.base:[1mTrain data shape: (12736, 22)[0m

INFO3:lightautoml.reader.base:Feats was rejected during automatic roles guess: []


[16:28:24] Layer [1m1[0m train process start. Time left 587.39 secs


INFO:lightautoml.automl.base:Layer [1m1[0m train process start. Time left 587.39 secs


[16:28:25] Start fitting [1mLvl_0_Pipe_0_Mod_0_LinearL2[0m ...


INFO:lightautoml.ml_algo.base:Start fitting [1mLvl_0_Pipe_0_Mod_0_LinearL2[0m ...
DEBUG:lightautoml.ml_algo.base:Training params: {'tol': 1e-06, 'max_iter': 100, 'cs': [1e-05, 5e-05, 0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 50, 100, 500, 1000, 5000, 10000, 50000, 100000], 'early_stopping': 2, 'categorical_idx': [0, 1, 2, 3, 4, 12], 'embed_sizes': array([12,  5,  3,  8, 23, 11], dtype=int32), 'data_size': 39}
INFO2:lightautoml.ml_algo.base:===== Start working with [1mfold 0[0m for [1mLvl_0_Pipe_0_Mod_0_LinearL2[0m =====
INFO3:lightautoml.ml_algo.torch_based.linear_model:Linear model: C = 1e-05 score = 0.6398345593715142
INFO3:lightautoml.ml_algo.torch_based.linear_model:Linear model: C = 5e-05 score = 0.6525084734247472
INFO3:lightautoml.ml_algo.torch_based.linear_model:Linear model: C = 0.0001 score = 0.65864594360571
INFO3:lightautoml.ml_algo.torch_based.linear_model:Linear model: C = 0.0005 score = 0.6675973741915671
INFO3:lightautoml.ml_algo.torch_based.li

[16:28:33] Fitting [1mLvl_0_Pipe_0_Mod_0_LinearL2[0m finished. score = [1m0.6621754288716148[0m


INFO:lightautoml.ml_algo.base:Fitting [1mLvl_0_Pipe_0_Mod_0_LinearL2[0m finished. score = [1m0.6621754288716148[0m


[16:28:33] [1mLvl_0_Pipe_0_Mod_0_LinearL2[0m fitting and predicting completed


INFO:lightautoml.ml_algo.base:[1mLvl_0_Pipe_0_Mod_0_LinearL2[0m fitting and predicting completed


[16:28:33] Time left 578.49 secs



INFO:lightautoml.automl.base:Time left 578.49 secs

INFO3:lightautoml.ml_algo.boost_lgbm:Training until validation scores don't improve for 200 rounds
INFO3:lightautoml.ml_algo.boost_lgbm:[100]	valid's auc: 0.602806
INFO3:lightautoml.ml_algo.boost_lgbm:[200]	valid's auc: 0.599106
INFO3:lightautoml.ml_algo.boost_lgbm:Early stopping, best iteration is:
[8]	valid's auc: 0.607217


[16:28:34] [1mSelector_LightGBM[0m fitting and predicting completed


INFO:lightautoml.ml_algo.base:[1mSelector_LightGBM[0m fitting and predicting completed


[16:28:34] Start fitting [1mLvl_0_Pipe_1_Mod_0_LightGBM[0m ...


INFO:lightautoml.ml_algo.base:Start fitting [1mLvl_0_Pipe_1_Mod_0_LightGBM[0m ...
DEBUG:lightautoml.ml_algo.base:Training params: {'task': 'train', 'learning_rate': 0.02, 'num_leaves': 64, 'feature_fraction': 0.7, 'bagging_fraction': 0.7, 'bagging_freq': 1, 'max_depth': -1, 'verbosity': -1, 'reg_alpha': 0.2, 'reg_lambda': 0.0, 'min_split_gain': 0.0, 'zero_as_missing': False, 'num_threads': 2, 'max_bin': 255, 'min_data_in_bin': 3, 'num_trees': 3000, 'early_stopping_rounds': 200, 'random_state': 42, 'verbose_eval': 100}
INFO2:lightautoml.ml_algo.base:===== Start working with [1mfold 0[0m for [1mLvl_0_Pipe_1_Mod_0_LightGBM[0m =====
INFO3:lightautoml.ml_algo.boost_lgbm:Training until validation scores don't improve for 200 rounds
INFO3:lightautoml.ml_algo.boost_lgbm:[100]	valid's auc: 0.611495
INFO3:lightautoml.ml_algo.boost_lgbm:[200]	valid's auc: 0.609982
INFO3:lightautoml.ml_algo.boost_lgbm:Early stopping, best iteration is:
[6]	valid's auc: 0.617503
INFO2:lightautoml.ml_algo.base

[16:28:36] Fitting [1mLvl_0_Pipe_1_Mod_0_LightGBM[0m finished. score = [1m0.6206742981101447[0m


INFO:lightautoml.ml_algo.base:Fitting [1mLvl_0_Pipe_1_Mod_0_LightGBM[0m finished. score = [1m0.6206742981101447[0m


[16:28:36] [1mLvl_0_Pipe_1_Mod_0_LightGBM[0m fitting and predicting completed


INFO:lightautoml.ml_algo.base:[1mLvl_0_Pipe_1_Mod_0_LightGBM[0m fitting and predicting completed


[16:28:36] Start hyperparameters optimization for [1mLvl_0_Pipe_1_Mod_1_Tuned_LightGBM[0m ... Time budget is 117.28 secs


INFO:lightautoml.ml_algo.tuning.optuna:Start hyperparameters optimization for [1mLvl_0_Pipe_1_Mod_1_Tuned_LightGBM[0m ... Time budget is 117.28 secs
Optimization Progress:   0%|          | 0/101 [00:00<?, ?it/s]INFO:optuna.storages._in_memory:A new study created in memory with name: no-name-8f6ccebd-9b69-4f9f-a201-a4d37a9404f0
INFO3:lightautoml.ml_algo.boost_lgbm:Training until validation scores don't improve for 200 rounds
INFO3:lightautoml.ml_algo.boost_lgbm:[100]	valid's auc: 0.607429
INFO3:lightautoml.ml_algo.boost_lgbm:[200]	valid's auc: 0.596894
INFO3:lightautoml.ml_algo.boost_lgbm:Early stopping, best iteration is:
[22]	valid's auc: 0.615422
INFO:optuna.study.study:Trial 0 finished with value: 0.6154226382733279 and parameters: {'feature_fraction': 0.6872700594236812, 'num_leaves': 244, 'bagging_fraction': 0.8659969709057025, 'min_sum_hessian_in_leaf': 0.24810409748678125, 'reg_alpha': 2.5361081166471375e-07, 'reg_lambda': 2.5348407664333426e-07}. Best is trial 0 with value: 0

[16:29:53] Hyperparameters optimization for [1mLvl_0_Pipe_1_Mod_1_Tuned_LightGBM[0m completed



INFO:lightautoml.ml_algo.tuning.optuna:Hyperparameters optimization for [1mLvl_0_Pipe_1_Mod_1_Tuned_LightGBM[0m completed
INFO2:lightautoml.ml_algo.tuning.optuna:The set of hyperparameters [1m{'feature_fraction': 0.7679475115073882, 'num_leaves': 194, 'bagging_fraction': 0.6473614706935786, 'min_sum_hessian_in_leaf': 0.0038944331874191805, 'reg_alpha': 4.261030636449062, 'reg_lambda': 1.807937829853905e-07}[0m
 achieve 0.6335 auc


[16:29:53] Start fitting [1mLvl_0_Pipe_1_Mod_1_Tuned_LightGBM[0m ...


INFO:lightautoml.ml_algo.base:Start fitting [1mLvl_0_Pipe_1_Mod_1_Tuned_LightGBM[0m ...
DEBUG:lightautoml.ml_algo.base:Training params: {'task': 'train', 'learning_rate': 0.05, 'num_leaves': 194, 'feature_fraction': 0.7679475115073882, 'bagging_fraction': 0.6473614706935786, 'bagging_freq': 1, 'max_depth': -1, 'verbosity': -1, 'reg_alpha': 4.261030636449062, 'reg_lambda': 1.807937829853905e-07, 'min_split_gain': 0.0, 'zero_as_missing': False, 'num_threads': 2, 'max_bin': 255, 'min_data_in_bin': 3, 'num_trees': 3000, 'early_stopping_rounds': 100, 'random_state': 42, 'verbose_eval': 100, 'min_sum_hessian_in_leaf': 0.0038944331874191805}
INFO2:lightautoml.ml_algo.base:===== Start working with [1mfold 0[0m for [1mLvl_0_Pipe_1_Mod_1_Tuned_LightGBM[0m =====
INFO3:lightautoml.ml_algo.boost_lgbm:Training until validation scores don't improve for 100 rounds
INFO3:lightautoml.ml_algo.boost_lgbm:[100]	valid's auc: 0.617638
INFO3:lightautoml.ml_algo.boost_lgbm:Early stopping, best iteration 

[16:29:55] Fitting [1mLvl_0_Pipe_1_Mod_1_Tuned_LightGBM[0m finished. score = [1m0.6256266074839305[0m


INFO:lightautoml.ml_algo.base:Fitting [1mLvl_0_Pipe_1_Mod_1_Tuned_LightGBM[0m finished. score = [1m0.6256266074839305[0m


[16:29:55] [1mLvl_0_Pipe_1_Mod_1_Tuned_LightGBM[0m fitting and predicting completed


INFO:lightautoml.ml_algo.base:[1mLvl_0_Pipe_1_Mod_1_Tuned_LightGBM[0m fitting and predicting completed


[16:29:55] Start fitting [1mLvl_0_Pipe_1_Mod_2_CatBoost[0m ...


INFO:lightautoml.ml_algo.base:Start fitting [1mLvl_0_Pipe_1_Mod_2_CatBoost[0m ...
DEBUG:lightautoml.ml_algo.base:Training params: {'task_type': 'CPU', 'thread_count': 2, 'random_seed': 42, 'num_trees': 5000, 'learning_rate': 0.035, 'l2_leaf_reg': 0.01, 'bootstrap_type': 'Bernoulli', 'grow_policy': 'SymmetricTree', 'max_depth': 5, 'min_data_in_leaf': 1, 'one_hot_max_size': 10, 'fold_permutation_block': 1, 'boosting_type': 'Plain', 'boost_from_average': True, 'od_type': 'Iter', 'od_wait': 100, 'max_bin': 32, 'feature_border_type': 'GreedyLogSum', 'nan_mode': 'Min', 'verbose': 100, 'allow_writing_files': False, 'verbose_eval': 100}
INFO2:lightautoml.ml_algo.base:===== Start working with [1mfold 0[0m for [1mLvl_0_Pipe_1_Mod_2_CatBoost[0m =====
INFO3:lightautoml.ml_algo.boost_cb:0:	test: 0.5870352	best: 0.5870352 (0)	total: 49.2ms	remaining: 4m 6s
INFO3:lightautoml.ml_algo.boost_cb:100:	test: 0.6217889	best: 0.6219428 (5)	total: 327ms	remaining: 15.9s
INFO3:lightautoml.ml_algo.boost_c

[16:29:58] Fitting [1mLvl_0_Pipe_1_Mod_2_CatBoost[0m finished. score = [1m0.6195994123341849[0m


INFO:lightautoml.ml_algo.base:Fitting [1mLvl_0_Pipe_1_Mod_2_CatBoost[0m finished. score = [1m0.6195994123341849[0m


[16:29:58] [1mLvl_0_Pipe_1_Mod_2_CatBoost[0m fitting and predicting completed


INFO:lightautoml.ml_algo.base:[1mLvl_0_Pipe_1_Mod_2_CatBoost[0m fitting and predicting completed


[16:29:58] Start hyperparameters optimization for [1mLvl_0_Pipe_1_Mod_3_Tuned_CatBoost[0m ... Time budget is 300.00 secs


INFO:lightautoml.ml_algo.tuning.optuna:Start hyperparameters optimization for [1mLvl_0_Pipe_1_Mod_3_Tuned_CatBoost[0m ... Time budget is 300.00 secs
Optimization Progress:   0%|          | 0/101 [00:00<?, ?it/s]INFO:optuna.storages._in_memory:A new study created in memory with name: no-name-642c7483-97e2-4afa-81dc-e6ee477268ed
INFO3:lightautoml.ml_algo.boost_cb:0:	test: 0.6062476	best: 0.6062476 (0)	total: 2.58ms	remaining: 12.9s
INFO3:lightautoml.ml_algo.boost_cb:100:	test: 0.6233640	best: 0.6248455 (46)	total: 274ms	remaining: 13.3s
INFO3:lightautoml.ml_algo.boost_cb:Stopped by overfitting detector  (100 iterations wait)
INFO3:lightautoml.ml_algo.boost_cb:bestTest = 0.6248454887
INFO3:lightautoml.ml_algo.boost_cb:bestIteration = 46
INFO3:lightautoml.ml_algo.boost_cb:Shrink model to first 47 iterations.
INFO:optuna.study.study:Trial 0 finished with value: 0.6248454886855774 and parameters: {'max_depth': 4, 'nan_mode': 'Max', 'l2_leaf_reg': 0.0024430162614261413, 'min_data_in_leaf': 

[16:31:44] Hyperparameters optimization for [1mLvl_0_Pipe_1_Mod_3_Tuned_CatBoost[0m completed



INFO:lightautoml.ml_algo.tuning.optuna:Hyperparameters optimization for [1mLvl_0_Pipe_1_Mod_3_Tuned_CatBoost[0m completed
INFO2:lightautoml.ml_algo.tuning.optuna:The set of hyperparameters [1m{'max_depth': 4, 'nan_mode': 'Min', 'l2_leaf_reg': 8.254820198435678e-06, 'min_data_in_leaf': 8}[0m
 achieve 0.6324 auc


[16:31:44] Start fitting [1mLvl_0_Pipe_1_Mod_3_Tuned_CatBoost[0m ...


INFO:lightautoml.ml_algo.base:Start fitting [1mLvl_0_Pipe_1_Mod_3_Tuned_CatBoost[0m ...
DEBUG:lightautoml.ml_algo.base:Training params: {'task_type': 'CPU', 'thread_count': 2, 'random_seed': 42, 'num_trees': 3000, 'learning_rate': 0.03, 'l2_leaf_reg': 8.254820198435678e-06, 'bootstrap_type': 'Bernoulli', 'grow_policy': 'SymmetricTree', 'max_depth': 4, 'min_data_in_leaf': 8, 'one_hot_max_size': 10, 'fold_permutation_block': 1, 'boosting_type': 'Plain', 'boost_from_average': True, 'od_type': 'Iter', 'od_wait': 100, 'max_bin': 32, 'feature_border_type': 'GreedyLogSum', 'nan_mode': 'Min', 'verbose': 100, 'allow_writing_files': False, 'verbose_eval': 100}
INFO2:lightautoml.ml_algo.base:===== Start working with [1mfold 0[0m for [1mLvl_0_Pipe_1_Mod_3_Tuned_CatBoost[0m =====
INFO3:lightautoml.ml_algo.boost_cb:0:	test: 0.5870352	best: 0.5870352 (0)	total: 3.33ms	remaining: 9.98s
INFO3:lightautoml.ml_algo.boost_cb:100:	test: 0.6213241	best: 0.6270632 (12)	total: 332ms	remaining: 9.52s
INFO

[16:31:46] Fitting [1mLvl_0_Pipe_1_Mod_3_Tuned_CatBoost[0m finished. score = [1m0.6033946842350502[0m


INFO:lightautoml.ml_algo.base:Fitting [1mLvl_0_Pipe_1_Mod_3_Tuned_CatBoost[0m finished. score = [1m0.6033946842350502[0m


[16:31:46] [1mLvl_0_Pipe_1_Mod_3_Tuned_CatBoost[0m fitting and predicting completed


INFO:lightautoml.ml_algo.base:[1mLvl_0_Pipe_1_Mod_3_Tuned_CatBoost[0m fitting and predicting completed


[16:31:46] Time left 385.20 secs



INFO:lightautoml.automl.base:Time left 385.20 secs



[16:31:46] [1mLayer 1 training completed.[0m



INFO:lightautoml.automl.base:[1mLayer 1 training completed.[0m



[16:31:46] Blending: optimization starts with equal weights. Score = [1m0.6780130[0m


INFO:lightautoml.automl.blend:Blending: optimization starts with equal weights. Score = [1m0.6780130[0m


[16:31:46] Blending: iteration [1m0[0m: score = [1m0.6855706[0m, weights = [1m[0.3503859  0.15537402 0.24492982 0.17225221 0.077058  ][0m


INFO:lightautoml.automl.blend:Blending: iteration [1m0[0m: score = [1m0.6855706[0m, weights = [1m[0.3503859  0.15537402 0.24492982 0.17225221 0.077058  ][0m


[16:31:47] Blending: iteration [1m1[0m: score = [1m0.6856607[0m, weights = [1m[0.37032697 0.15044034 0.24521966 0.16168319 0.07232989][0m


INFO:lightautoml.automl.blend:Blending: iteration [1m1[0m: score = [1m0.6856607[0m, weights = [1m[0.37032697 0.15044034 0.24521966 0.16168319 0.07232989][0m


[16:31:47] Blending: no improvements for score. Terminated.



INFO:lightautoml.automl.blend:Blending: no improvements for score. Terminated.



[16:31:47] Blending: best score = [1m0.6856607[0m, best weights = [1m[0.37032697 0.15044034 0.24521966 0.16168319 0.07232989][0m


INFO:lightautoml.automl.blend:Blending: best score = [1m0.6856607[0m, best weights = [1m[0.37032697 0.15044034 0.24521966 0.16168319 0.07232989][0m


[16:31:47] [1mAutoml preset training completed in 215.45 seconds[0m



INFO:lightautoml.automl.presets.base:[1mAutoml preset training completed in 215.45 seconds[0m



[16:31:47] Model description:
Final prediction for new objects (level 0) = 
	 0.37033 * (5 averaged models Lvl_0_Pipe_0_Mod_0_LinearL2) +
	 0.15044 * (5 averaged models Lvl_0_Pipe_1_Mod_0_LightGBM) +
	 0.24522 * (5 averaged models Lvl_0_Pipe_1_Mod_1_Tuned_LightGBM) +
	 0.16168 * (5 averaged models Lvl_0_Pipe_1_Mod_2_CatBoost) +
	 0.07233 * (5 averaged models Lvl_0_Pipe_1_Mod_3_Tuned_CatBoost) 



INFO:lightautoml.automl.presets.base:Model description:
Final prediction for new objects (level 0) = 
	 0.37033 * (5 averaged models Lvl_0_Pipe_0_Mod_0_LinearL2) +
	 0.15044 * (5 averaged models Lvl_0_Pipe_1_Mod_0_LightGBM) +
	 0.24522 * (5 averaged models Lvl_0_Pipe_1_Mod_1_Tuned_LightGBM) +
	 0.16168 * (5 averaged models Lvl_0_Pipe_1_Mod_2_CatBoost) +
	 0.07233 * (5 averaged models Lvl_0_Pipe_1_Mod_3_Tuned_CatBoost) 



In [None]:
from sklearn.metrics import roc_auc_score
import numpy as np

In [None]:
# 12) Предсказания на тестовом наборе
test_pred  = automl.predict(test_df)
test_proba = test_pred.data.ravel()
y_test     = test_df['target_column'].values

In [None]:
# 13) Функция для вычисления метрик
def calc_metrics(y_true, proba, thr=0.5):
    mask = ~np.isnan(proba)
    y, p = y_true[mask], proba[mask]
    preds = (p >= thr).astype(int)
    prec = precision_score(y, preds, zero_division=0)
    rec  = recall_score(y, preds, zero_division=0)
    f1   = f1_score(y, preds, zero_division=0)
    roc  = roc_auc_score(y, p)
    prc, rec_curve, _ = precision_recall_curve(y, p)
    pr_auc = auc(rec_curve, prc)
    return prec, rec, f1, roc, pr_auc

In [None]:
# 14) Подсчёт метрик и вывод
names    = ['Precision','Recall','F1-score','ROC-AUC','PR-AUC']
mt_train = calc_metrics(y_train_oof, train_proba)
mt_test  = calc_metrics(y_test,       test_proba)

print("=== Train metrics (OOF) ===")
for n,v in zip(names, mt_train):
    print(f"{n:10s}: {v:.4f}")
print("\n=== Test metrics ===")
for n,v in zip(names, mt_test):
    print(f"{n:10s}: {v:.4f}")

=== Train metrics (OOF) ===
Precision : 0.8054
Recall    : 0.0839
F1-score  : 0.1519
ROC-AUC   : 0.6857
PR-AUC    : 0.5412

=== Test metrics ===
Precision : 0.7969
Recall    : 0.0480
F1-score  : 0.0906
ROC-AUC   : 0.6912
PR-AUC    : 0.5340
