In [35]:

import sys
from pathlib import Path
root = Path.cwd()
import pandas as pd
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_auc_score
from optuna import trial

if (root / "utils").exists():
    project_root = root
elif (root.parent / "utils").exists():
    project_root = root.parent

sys.path.insert(0, str(project_root))

from utils.data_preprocessing import (
    map_pistol_rounds,
    encode_categorical_columns
)
from utils.model_helpers import (
    train_lightgbm_model,
    model_traininng_pipeline

)

DROP_COLUMNS = ["demo_file", "tick", "user_name", "attacker_name"]
CATEGORICAL_COLS = ["weapon","attacker_team_name","user_team_name","map"]
TARGET_COL = "t_won_round"

In [2]:
df = pd.read_csv("../../data/data/processed_demo.csv")

In [3]:
display(df.head())

Unnamed: 0,demo_file,tick,round,attacker_X,attacker_Y,attacker_Z,attacker_name,user_X,user_Y,user_Z,...,weapon,bomb_site,is_bomb_planted,round_time_left,attacker_alive_count,user_alive_count,attacker_team_name,user_team_name,t_won_round,map
0,../../data/demos/hotu-vs-betboom-m1-anubis.dem,2764,1,113.98847,1605.707,-31.96875,dukefissura,299.37762,712.1215,-28.96875,...,usp_silencer,0,0,100.0,4,4,CT,T,0,de_anubis
1,../../data/demos/hotu-vs-betboom-m1-anubis.dem,3399,1,-130.4972,1820.2451,27.03125,S1ren,-487.00037,2119.135,18.683817,...,glock,0,0,90.0,3,4,T,CT,0,de_anubis
2,../../data/demos/hotu-vs-betboom-m1-anubis.dem,4109,1,128.15846,1279.7332,-31.952576,Boombl4,306.31857,1133.8534,-28.96875,...,glock,0,0,79.0,3,3,T,CT,0,de_anubis
3,../../data/demos/hotu-vs-betboom-m1-anubis.dem,4132,1,-999.7292,1536.6414,-30.96875,n0rb3r7,-861.1686,1614.4656,-30.96875,...,elite,0,0,78.0,2,3,CT,T,0,de_anubis
4,../../data/demos/hotu-vs-betboom-m1-anubis.dem,4195,1,-896.426,1557.873,-30.96875,n0rb3r7,-821.1482,1761.1349,43.203125,...,elite,0,0,77.0,2,2,CT,T,0,de_anubis


In [4]:
df.drop(columns=DROP_COLUMNS, inplace=True)

In [5]:
df = map_pistol_rounds(df)

In [6]:
display(df)


Unnamed: 0,round,attacker_X,attacker_Y,attacker_Z,user_X,user_Y,user_Z,weapon,bomb_site,is_bomb_planted,round_time_left,attacker_alive_count,user_alive_count,attacker_team_name,user_team_name,t_won_round,map,is_pistol_round
0,1,113.98847,1605.7070,-31.968750,299.37762,712.1215,-28.968750,usp_silencer,0,0,100.0,4,4,CT,T,0,de_anubis,1
1,1,-130.49720,1820.2451,27.031250,-487.00037,2119.1350,18.683817,glock,0,0,90.0,3,4,T,CT,0,de_anubis,1
2,1,128.15846,1279.7332,-31.952576,306.31857,1133.8534,-28.968750,glock,0,0,79.0,3,3,T,CT,0,de_anubis,1
3,1,-999.72920,1536.6414,-30.968750,-861.16860,1614.4656,-30.968750,elite,0,0,78.0,2,3,CT,T,0,de_anubis,1
4,1,-896.42600,1557.8730,-30.968750,-821.14820,1761.1349,43.203125,elite,0,0,77.0,2,2,CT,T,0,de_anubis,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
628,24,-995.39136,-1337.7007,-161.841550,179.97043,-2343.2722,-39.968750,awp,0,0,34.0,2,2,CT,T,1,de_mirage,0
629,24,-767.96454,-1678.9241,-173.461000,174.63446,-1667.9617,-167.968750,m4a1_silencer,0,0,33.0,2,1,CT,T,1,de_mirage,0
630,24,-827.38500,-2293.9255,-174.426570,-523.15753,-1362.5345,-138.732360,ak47,0,0,31.0,0,2,T,CT,1,de_mirage,0
631,24,-799.72780,-2297.8400,-176.711000,-721.74540,-1531.3291,-167.968750,ak47,0,0,30.0,0,1,T,CT,1,de_mirage,0


In [7]:
df = encode_categorical_columns(df, CATEGORICAL_COLS)[0].drop(columns=["round"])

In [8]:
display(df)

Unnamed: 0,attacker_X,attacker_Y,attacker_Z,user_X,user_Y,user_Z,weapon,bomb_site,is_bomb_planted,round_time_left,attacker_alive_count,user_alive_count,attacker_team_name,user_team_name,t_won_round,map,is_pistol_round
0,113.98847,1605.7070,-31.968750,299.37762,712.1215,-28.968750,19,0,0,100.0,4,4,0,1,0,0,1
1,-130.49720,1820.2451,27.031250,-487.00037,2119.1350,18.683817,7,0,0,90.0,3,4,1,0,0,0,1
2,128.15846,1279.7332,-31.952576,306.31857,1133.8534,-28.968750,7,0,0,79.0,3,3,1,0,0,0,1
3,-999.72920,1536.6414,-30.968750,-861.16860,1614.4656,-30.968750,3,0,0,78.0,2,3,0,1,0,0,1
4,-896.42600,1557.8730,-30.968750,-821.14820,1761.1349,43.203125,3,0,0,77.0,2,2,0,1,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
628,-995.39136,-1337.7007,-161.841550,179.97043,-2343.2722,-39.968750,1,0,0,34.0,2,2,0,1,1,2,0
629,-767.96454,-1678.9241,-173.461000,174.63446,-1667.9617,-167.968750,11,0,0,33.0,2,1,0,1,1,2,0
630,-827.38500,-2293.9255,-174.426570,-523.15753,-1362.5345,-138.732360,0,0,0,31.0,0,2,1,0,1,2,0
631,-799.72780,-2297.8400,-176.711000,-721.74540,-1531.3291,-167.968750,0,0,0,30.0,0,1,1,0,1,2,0


In [9]:
X_train, X_val, y_train, y_val = train_test_split(df.drop(columns="t_won_round"),df["t_won_round"],test_size=0.2)

In [10]:
display(X_train)

Unnamed: 0,attacker_X,attacker_Y,attacker_Z,user_X,user_Y,user_Z,weapon,bomb_site,is_bomb_planted,round_time_left,attacker_alive_count,user_alive_count,attacker_team_name,user_team_name,map,is_pistol_round
352,-706.33136,-2323.26950,-178.003780,-776.76850,-1311.30970,-167.968750,0,1,0,50.0,2,4,0,1,2,0
98,-714.35190,-682.51514,106.031265,-1879.13110,175.21478,58.031250,14,0,0,32.0,3,0,0,1,0,0
509,-797.30554,-1376.39710,-167.968750,-786.53650,-2242.55200,-179.968750,11,1,1,35.0,0,0,0,1,2,0
529,-2041.41110,654.40186,-43.968750,-1248.04330,486.49146,-163.678710,0,2,0,88.0,4,3,1,0,2,0
284,1314.65110,2503.56050,70.239090,489.78006,2099.27100,104.078125,11,1,0,78.0,3,4,0,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
281,416.30356,2584.96530,95.498764,604.43164,2376.71000,67.031250,0,0,0,27.0,0,4,1,0,1,0
474,-1419.11630,162.18160,-167.968750,-2250.16330,-129.57672,-167.827880,0,2,0,44.0,4,3,1,0,2,0
562,-1879.52090,742.44500,-47.968750,-2257.16600,789.77136,-125.556580,7,2,0,64.0,3,2,1,0,2,1
94,-788.34900,229.31868,-68.416626,-583.32730,63.29404,-93.968750,4,0,0,84.0,4,3,0,1,0,0


In [11]:
display(y_train)

352    1
98     0
509    0
529    1
284    1
      ..
281    0
474    1
562    1
94     0
578    0
Name: t_won_round, Length: 506, dtype: int64

In [48]:
def create_parameter_space(trial):
    """Create custom parameter space for LightGBM hyperparameter optimization."""
    param_grid = {
        "n_estimators": trial.suggest_int("n_estimators",50,1000,step = 5),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3),
        "num_leaves": trial.suggest_int("num_leaves", 15, 3000, step=5),
        "max_depth": trial.suggest_int("max_depth", 2, 12),
        "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 20, 100, step=5),
        "max_bin": trial.suggest_int("max_bin", 20, 300),
        "lambda_l1": trial.suggest_float("lambda_l1", 0, 0.8, step=0.02),
        "lambda_l2": trial.suggest_float("lambda_l2", 0, 0.8, step=0.02),
        "min_gain_to_split": trial.suggest_float("min_gain_to_split", 0, 15),
        "bagging_fraction": trial.suggest_float(
            "bagging_fraction", 0.2, 0.95, step=0.1
        ),
        "feature_fraction": trial.suggest_float(
            "feature_fraction", 0.2, 0.95, step=0.1
        ),
    }
    return param_grid


In [49]:
model, metrics = model_traininng_pipeline(
    X_train, 
    y_train, 
    X_val, 
    y_val, 
    parameter_space=create_parameter_space,
    n_trials=10,
    metric="balanced_accuracy"
)


[32m[I 2026-01-31 15:56:47,469][0m A new study created in memory with name: no-name-3fe4b4ec-b7e3-49e9-a701-73733980c09d[0m
  optuna_warn(
[32m[I 2026-01-31 15:56:47,604][0m Trial 0 finished with value: 0.7776511180992314 and parameters: {'n_estimators': 405, 'learning_rate': 0.28570714885887566, 'num_leaves': 2200, 'max_depth': 8, 'min_data_in_leaf': 30, 'max_bin': 63, 'lambda_l1': 0.04, 'lambda_l2': 0.7000000000000001, 'min_gain_to_split': 9.016725176148132, 'bagging_fraction': 0.7, 'feature_fraction': 0.2}. Best is trial 0 with value: 0.7776511180992314.[0m
  optuna_warn(


[LightGBM] [Info] Number of positive: 212, number of negative: 192
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000029 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 481
[LightGBM] [Info] Number of data points in the train set: 404, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.524752 -> initscore=0.099091
[LightGBM] [Info] Start training from score 0.099091
[LightGBM] [Info] Number of positive: 213, number of negative: 192
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000026 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 482
[LightGBM] [Info] Number of data points in the train set: 405, number of used features: 16
[LightGBM] [Info] [binary:BoostFro

[32m[I 2026-01-31 15:56:47,798][0m Trial 1 finished with value: 0.7730913696715584 and parameters: {'n_estimators': 975, 'learning_rate': 0.2514083658321223, 'num_leaves': 645, 'max_depth': 4, 'min_data_in_leaf': 35, 'max_bin': 105, 'lambda_l1': 0.42, 'lambda_l2': 0.34, 'min_gain_to_split': 4.368437102970629, 'bagging_fraction': 0.6000000000000001, 'feature_fraction': 0.30000000000000004}. Best is trial 1 with value: 0.7730913696715584.[0m
  optuna_warn(


[LightGBM] [Info] Number of positive: 213, number of negative: 192
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000056 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 746
[LightGBM] [Info] Number of data points in the train set: 405, number of used features: 15
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.525926 -> initscore=0.103797
[LightGBM] [Info] Start training from score 0.103797
[LightGBM] [Info] Number of positive: 213, number of negative: 192
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000053 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 744
[LightGBM] [Info] Number of data points in the train set: 405, number of used features: 15
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.525926 -> initscore=0.103797
[LightGBM] [Info] Start training from score 0.103797
[LightGBM] [Info] Number of po

[32m[I 2026-01-31 15:56:47,891][0m Trial 2 finished with value: 0.7776511180992314 and parameters: {'n_estimators': 325, 'learning_rate': 0.11624493455517058, 'num_leaves': 1375, 'max_depth': 10, 'min_data_in_leaf': 35, 'max_bin': 164, 'lambda_l1': 0.48, 'lambda_l2': 0.02, 'min_gain_to_split': 9.113172778521575, 'bagging_fraction': 0.30000000000000004, 'feature_fraction': 0.2}. Best is trial 1 with value: 0.7730913696715584.[0m
  optuna_warn(


[LightGBM] [Info] Number of positive: 212, number of negative: 192
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000062 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 920
[LightGBM] [Info] Number of data points in the train set: 404, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.524752 -> initscore=0.099091
[LightGBM] [Info] Start training from score 0.099091
[LightGBM] [Info] Number of positive: 213, number of negative: 192
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000068 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 920
[LightGBM] [Info] Number of data points in the train set: 405, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.525926 -> initscore=0.103797
[LightGBM] [Info] Start training from score 0.103797
[LightGBM] [Info] Number of po

[32m[I 2026-01-31 15:56:48,234][0m Trial 3 finished with value: 0.7736809923130679 and parameters: {'n_estimators': 955, 'learning_rate': 0.2900332895916222, 'num_leaves': 2430, 'max_depth': 5, 'min_data_in_leaf': 25, 'max_bin': 212, 'lambda_l1': 0.36, 'lambda_l2': 0.1, 'min_gain_to_split': 7.427653651669052, 'bagging_fraction': 0.2, 'feature_fraction': 0.9}. Best is trial 1 with value: 0.7730913696715584.[0m


[LightGBM] [Info] Number of positive: 213, number of negative: 192
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000062 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 921
[LightGBM] [Info] Number of data points in the train set: 405, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.525926 -> initscore=0.103797
[LightGBM] [Info] Start training from score 0.103797
[LightGBM] [Info] Number of positive: 213, number of negative: 192
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000083 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 915
[LightGBM] [Info] Number of data points in the train set: 405, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.525926 -> initscore=0.103797
[LightGBM] [Info] Start training from score 0.103797
[LightGBM] [Info] Number of po

  optuna_warn(
[32m[I 2026-01-31 15:56:48,321][0m Trial 4 finished with value: 0.7532538434661076 and parameters: {'n_estimators': 295, 'learning_rate': 0.20213146246265476, 'num_leaves': 945, 'max_depth': 7, 'min_data_in_leaf': 65, 'max_bin': 71, 'lambda_l1': 0.78, 'lambda_l2': 0.62, 'min_gain_to_split': 14.092484123462837, 'bagging_fraction': 0.9, 'feature_fraction': 0.6000000000000001}. Best is trial 4 with value: 0.7532538434661076.[0m
  optuna_warn(


[LightGBM] [Info] Number of positive: 213, number of negative: 192
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000068 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 537
[LightGBM] [Info] Number of data points in the train set: 405, number of used features: 15
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.525926 -> initscore=0.103797
[LightGBM] [Info] Start training from score 0.103797
[LightGBM] [Info] Number of positive: 212, number of negative: 192
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000057 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 889
[LightGBM] [Info] Number of data points in the train set: 404, number of used features: 15
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.524752 -> initscore=0.099091
[LightGBM] [Info] Start training from score 0.099091
[LightGBM] [Info] Number of po

[32m[I 2026-01-31 15:56:48,506][0m Trial 5 finished with value: 0.7711696366177498 and parameters: {'n_estimators': 930, 'learning_rate': 0.03566282559505665, 'num_leaves': 600, 'max_depth': 2, 'min_data_in_leaf': 45, 'max_bin': 129, 'lambda_l1': 0.22, 'lambda_l2': 0.66, 'min_gain_to_split': 5.3512999004038395, 'bagging_fraction': 0.4, 'feature_fraction': 0.6000000000000001}. Best is trial 4 with value: 0.7532538434661076.[0m
  optuna_warn(
[32m[I 2026-01-31 15:56:48,607][0m Trial 6 finished with value: 0.7861809923130678 and parameters: {'n_estimators': 180, 'learning_rate': 0.2426371244186715, 'num_leaves': 235, 'max_depth': 12, 'min_data_in_leaf': 85, 'max_bin': 75, 'lambda_l1': 0.0, 'lambda_l2': 0.66, 'min_gain_to_split': 10.602860157714257, 'bagging_fraction': 0.7, 'feature_fraction': 0.8}. Best is trial 4 with value: 0.7532538434661076.[0m
  optuna_warn(
[32m[I 2026-01-31 15:56:48,667][0m Trial 7 finished with value: 0.7714011180992314 and parameters: {'n_estimators': 120

[LightGBM] [Info] Number of positive: 212, number of negative: 192
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000057 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 560
[LightGBM] [Info] Number of data points in the train set: 404, number of used features: 15
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.524752 -> initscore=0.099091
[LightGBM] [Info] Start training from score 0.099091
[LightGBM] [Info] Number of positive: 213, number of negative: 192
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000059 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 563
[LightGBM] [Info] Number of data points in the train set: 405, number of used features: 15
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.525926 -> initscore=0.103797
[LightGBM] [Info] Start training from score 0.103797
[LightGBM] [Info] Number of po

  optuna_warn(
[32m[I 2026-01-31 15:56:48,832][0m Trial 8 finished with value: 0.7776511180992314 and parameters: {'n_estimators': 895, 'learning_rate': 0.1469423282969653, 'num_leaves': 370, 'max_depth': 9, 'min_data_in_leaf': 80, 'max_bin': 177, 'lambda_l1': 0.62, 'lambda_l2': 0.4, 'min_gain_to_split': 7.840992440729911, 'bagging_fraction': 0.5, 'feature_fraction': 0.2}. Best is trial 4 with value: 0.7532538434661076.[0m
  optuna_warn(


[LightGBM] [Info] Number of positive: 213, number of negative: 192
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000039 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 925
[LightGBM] [Info] Number of data points in the train set: 405, number of used features: 15
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.525926 -> initscore=0.103797
[LightGBM] [Info] Start training from score 0.103797
[LightGBM] [Info] Number of positive: 213, number of negative: 192
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000057 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 925
[LightGBM] [Info] Number of data points in the train set: 405, number of used features: 15
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.525926 -> initscore=0.103797
[LightGBM] [Info] Start training from score 0.103797
[LightGBM] [Info] Number of po

[32m[I 2026-01-31 15:56:48,923][0m Trial 9 finished with value: 0.7658586652690426 and parameters: {'n_estimators': 150, 'learning_rate': 0.019114463849152934, 'num_leaves': 1915, 'max_depth': 5, 'min_data_in_leaf': 60, 'max_bin': 275, 'lambda_l1': 0.2, 'lambda_l2': 0.32, 'min_gain_to_split': 11.333267078145731, 'bagging_fraction': 0.30000000000000004, 'feature_fraction': 0.2}. Best is trial 4 with value: 0.7532538434661076.[0m


[LightGBM] [Info] Number of positive: 266, number of negative: 240
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000053 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 537
[LightGBM] [Info] Number of data points in the train set: 506, number of used features: 15
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.525692 -> initscore=0.102857
[LightGBM] [Info] Start training from score 0.102857


In [50]:
def metrics_to_df(metrics:dict) -> pd.DataFrame:
    df = pd.DataFrame.from_dict(metrics["classification_report"])
    df["auc"] = metrics["auc"]
    return df


In [51]:
df_metrics = metrics_to_df(metrics)

In [52]:
display(df_metrics)

Unnamed: 0,0,1,accuracy,macro avg,weighted avg,auc
precision,0.657143,0.824561,0.732283,0.740852,0.750739,0.848089
recall,0.821429,0.661972,0.732283,0.7417,0.732283,0.848089
f1-score,0.730159,0.734375,0.732283,0.732267,0.732516,0.848089
support,56.0,71.0,0.732283,127.0,127.0,0.848089


In [53]:
model.predict(X_val)



array([0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0,
       0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0,
       1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0])

In [54]:
test_model = lgb.LGBMClassifier()
test_model.fit(X_train, y_train)
test_model.predict(X_val)

[LightGBM] [Info] Number of positive: 266, number of negative: 240
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000091 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1122
[LightGBM] [Info] Number of data points in the train set: 506, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.525692 -> initscore=0.102857
[LightGBM] [Info] Start training from score 0.102857


array([1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1,
       1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0,
       0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0,
       1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0,
       0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1,
       0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0])

In [55]:
test_preds = test_model.predict(X_val)

In [56]:
print(classification_report(y_val, test_preds))

              precision    recall  f1-score   support

           0       0.74      0.75      0.74        56
           1       0.80      0.79      0.79        71

    accuracy                           0.77       127
   macro avg       0.77      0.77      0.77       127
weighted avg       0.77      0.77      0.77       127



In [57]:
display(df_metrics)

Unnamed: 0,0,1,accuracy,macro avg,weighted avg,auc
precision,0.657143,0.824561,0.732283,0.740852,0.750739,0.848089
recall,0.821429,0.661972,0.732283,0.7417,0.732283,0.848089
f1-score,0.730159,0.734375,0.732283,0.732267,0.732516,0.848089
support,56.0,71.0,0.732283,127.0,127.0,0.848089


In [33]:
pred_proba =test_model.predict_proba(X_val)

In [36]:
test_auc = roc_auc_score(y_val, pred_proba[:, 1])

In [37]:
print(test_auc)

0.8606639839034205
