Baseline

In [1]:
import xgboost as xgb
from sklearn.model_selection import LeaveOneGroupOut
from sklearn.metrics import roc_auc_score, f1_score
from sklearn.metrics import roc_curve
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

auc_scores = []
f1_scores = []

params = {
    'max_depth': 6,
    'eta': 0.3,
    'objective': 'binary:logistic',
    'eval_metric': 'auc',
    "gamma": 0,
    "subsample": 1,
    "colsample_bytree": 1,
    "colsample_bylevel": 1,
    "scale_pos_weight": 1,
    "reg_alpha": 0,
    "reg_lambda": 1,
}

data_folder = './data_baseline_2/'

for event in range(2):

    # load data
    dtrain = xgb.DMatrix(data_folder + '{}_train.data'.format(event))
    dtest = xgb.DMatrix(data_folder + '{}_val.data'.format(event))
    deval = xgb.DMatrix(data_folder + '{}_val.data'.format(event))
    
    # train
    evallist = [(dtrain, 'train'), (deval, 'eval')]
    bst = xgb.train(params, dtrain, num_boost_round=1000, evals=[(dtest, 'eval')], early_stopping_rounds=200)
    # predict probabilities
    y_pred_proba = bst.predict(dtest)
    # Convert predicted probabilities to class labels
    y_pred_labels = (y_pred_proba > 0.5).astype(int)
    
    y_real = dtest.get_label()
    
    # Calculate AUC
    auc = roc_auc_score(y_real, y_pred_proba)
    auc_scores.append(auc)
    
    # Calculate F1 score
    f1 = f1_score(y_real, y_pred_labels, average='macro')
    f1_scores.append(f1)
    
# Average AUC and F1 scores across all groups
baseline_avg_auc = np.mean(auc_scores)
baseline_avg_f1 = np.mean(f1_scores)

print(f"Avg AUC: {baseline_avg_auc}")
print(f"Avg F1 Score: {baseline_avg_f1}")

[0]	eval-auc:0.55698
[1]	eval-auc:0.60839
[2]	eval-auc:0.62462
[3]	eval-auc:0.62201
[4]	eval-auc:0.61981
[5]	eval-auc:0.60393
[6]	eval-auc:0.62012
[7]	eval-auc:0.62281
[8]	eval-auc:0.62107
[9]	eval-auc:0.63865
[10]	eval-auc:0.62992
[11]	eval-auc:0.61375
[12]	eval-auc:0.62084
[13]	eval-auc:0.62008
[14]	eval-auc:0.62439
[15]	eval-auc:0.63259
[16]	eval-auc:0.63017
[17]	eval-auc:0.64123
[18]	eval-auc:0.63711
[19]	eval-auc:0.62937
[20]	eval-auc:0.63467
[21]	eval-auc:0.62216
[22]	eval-auc:0.62059
[23]	eval-auc:0.61472
[24]	eval-auc:0.61297
[25]	eval-auc:0.61167
[26]	eval-auc:0.60526
[27]	eval-auc:0.61320
[28]	eval-auc:0.61194
[29]	eval-auc:0.61076
[30]	eval-auc:0.61922
[31]	eval-auc:0.61316
[32]	eval-auc:0.61251
[33]	eval-auc:0.61308
[34]	eval-auc:0.60473
[35]	eval-auc:0.60172
[36]	eval-auc:0.60111
[37]	eval-auc:0.60564
[38]	eval-auc:0.61022
[39]	eval-auc:0.61117
[40]	eval-auc:0.60847
[41]	eval-auc:0.61106
[42]	eval-auc:0.61274
[43]	eval-auc:0.61491
[44]	eval-auc:0.60980
[45]	eval-auc:0.6161

[148]	eval-auc:0.63117
[149]	eval-auc:0.63223
[150]	eval-auc:0.63096
[151]	eval-auc:0.63202
[152]	eval-auc:0.63185
[153]	eval-auc:0.63176
[154]	eval-auc:0.63147
[155]	eval-auc:0.63239
[156]	eval-auc:0.63303
[157]	eval-auc:0.63290
[158]	eval-auc:0.63366
[159]	eval-auc:0.63231
[160]	eval-auc:0.63231
[161]	eval-auc:0.63239
[162]	eval-auc:0.63180
[163]	eval-auc:0.63265
[164]	eval-auc:0.63294
[165]	eval-auc:0.63425
[166]	eval-auc:0.63391
[167]	eval-auc:0.63471
[168]	eval-auc:0.63568
[169]	eval-auc:0.63559
[170]	eval-auc:0.63585
[171]	eval-auc:0.63614
[172]	eval-auc:0.63517
[173]	eval-auc:0.63614
[174]	eval-auc:0.63719
[175]	eval-auc:0.63597
[176]	eval-auc:0.63391
[177]	eval-auc:0.63294
[178]	eval-auc:0.63349
[179]	eval-auc:0.63404
[180]	eval-auc:0.63366
[181]	eval-auc:0.63269
[182]	eval-auc:0.63332
[183]	eval-auc:0.63315
[184]	eval-auc:0.63231
[185]	eval-auc:0.63151
[186]	eval-auc:0.63185
[187]	eval-auc:0.63130
[188]	eval-auc:0.63079
[189]	eval-auc:0.63092
[190]	eval-auc:0.63244
[191]	eval-

In [2]:
print(f"Avg AUC: {baseline_avg_auc}")
print(f"Avg F1 Score: {baseline_avg_f1}")

Avg AUC: 0.624527166723174
Avg F1 Score: 0.4974266486271678
