Baseline

In [1]:
import xgboost as xgb
from sklearn.model_selection import LeaveOneGroupOut
from sklearn.metrics import roc_auc_score, f1_score
from sklearn.metrics import roc_curve
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

auc_scores = []
f1_scores = []

params = {
    'max_depth': 6,
    'eta': 0.3,
    'objective': 'binary:logistic',
    'eval_metric': 'auc',
    "gamma": 0,
    "subsample": 1,
    "colsample_bytree": 1,
    "colsample_bylevel": 1,
    "scale_pos_weight": 1,
    "reg_alpha": 0,
    "reg_lambda": 1,
}

data_folder = './data_baseline_2/'

for event in range(13):

    # load data
    dtrain = xgb.DMatrix(data_folder + '{}_train.data'.format(event))
    dtest = xgb.DMatrix(data_folder + '{}_val.data'.format(event))
    deval = xgb.DMatrix(data_folder + '{}_val.data'.format(event))
    
    # train
    evallist = [(dtrain, 'train'), (deval, 'eval')]
    bst = xgb.train(params, dtrain, num_boost_round=1000, evals=[(dtest, 'eval')], early_stopping_rounds=200)
    # predict probabilities
    y_pred_proba = bst.predict(dtest)
    # Convert predicted probabilities to class labels
    y_pred_labels = (y_pred_proba > 0.5).astype(int)
    
    y_real = dtest.get_label()
    
    # Calculate AUC
    auc = roc_auc_score(y_real, y_pred_proba)
    auc_scores.append(auc)
    
    # Calculate F1 score
    f1 = f1_score(y_real, y_pred_labels, average='macro')
    f1_scores.append(f1)
    
# Average AUC and F1 scores across all groups
avg_auc_baseline = np.mean(auc_scores)
avg_f1_baseline = np.mean(f1_scores)

print(f"Avg AUC: {avg_auc_baseline}")
print(f"Avg F1 Score: {avg_f1_baseline}")

[0]	eval-auc:0.51441
[1]	eval-auc:0.52123
[2]	eval-auc:0.48723
[3]	eval-auc:0.50725
[4]	eval-auc:0.53624
[5]	eval-auc:0.49862
[6]	eval-auc:0.51570
[7]	eval-auc:0.52545
[8]	eval-auc:0.53909
[9]	eval-auc:0.52476
[10]	eval-auc:0.53788
[11]	eval-auc:0.54012
[12]	eval-auc:0.54720
[13]	eval-auc:0.54763
[14]	eval-auc:0.54374
[15]	eval-auc:0.53667
[16]	eval-auc:0.54582
[17]	eval-auc:0.53891
[18]	eval-auc:0.53701
[19]	eval-auc:0.54651
[20]	eval-auc:0.54513
[21]	eval-auc:0.54616
[22]	eval-auc:0.54789
[23]	eval-auc:0.53857
[24]	eval-auc:0.55755
[25]	eval-auc:0.56152
[26]	eval-auc:0.57049
[27]	eval-auc:0.57688
[28]	eval-auc:0.58257
[29]	eval-auc:0.56083
[30]	eval-auc:0.55876
[31]	eval-auc:0.57808
[32]	eval-auc:0.57843
[33]	eval-auc:0.57964
[34]	eval-auc:0.57653
[35]	eval-auc:0.57274
[36]	eval-auc:0.57343
[37]	eval-auc:0.57567
[38]	eval-auc:0.57394
[39]	eval-auc:0.57739
[40]	eval-auc:0.58085
[41]	eval-auc:0.58119
[42]	eval-auc:0.58499
[43]	eval-auc:0.58430
[44]	eval-auc:0.58619
[45]	eval-auc:0.5886

[361]	eval-auc:0.63417
[362]	eval-auc:0.63417
[363]	eval-auc:0.63451
[364]	eval-auc:0.63417
[365]	eval-auc:0.63451
[366]	eval-auc:0.63503
[367]	eval-auc:0.63469
[368]	eval-auc:0.63538
[369]	eval-auc:0.63503
[370]	eval-auc:0.63572
[371]	eval-auc:0.63693
[372]	eval-auc:0.63658
[373]	eval-auc:0.63555
[374]	eval-auc:0.63555
[375]	eval-auc:0.63451
[376]	eval-auc:0.63641
[377]	eval-auc:0.63520
[378]	eval-auc:0.63469
[379]	eval-auc:0.63399
[380]	eval-auc:0.63399
[381]	eval-auc:0.63434
[382]	eval-auc:0.63399
[383]	eval-auc:0.63417
[384]	eval-auc:0.63279
[385]	eval-auc:0.63227
[386]	eval-auc:0.63296
[387]	eval-auc:0.63244
[388]	eval-auc:0.63227
[389]	eval-auc:0.63192
[390]	eval-auc:0.63244
[391]	eval-auc:0.63244
[392]	eval-auc:0.63382
[393]	eval-auc:0.63348
[394]	eval-auc:0.63382
[395]	eval-auc:0.63330
[396]	eval-auc:0.63296
[397]	eval-auc:0.63348
[398]	eval-auc:0.63399
[399]	eval-auc:0.63417
[400]	eval-auc:0.63382
[401]	eval-auc:0.63330
[402]	eval-auc:0.63382
[403]	eval-auc:0.63348
[404]	eval-

[189]	eval-auc:0.59878
[190]	eval-auc:0.59711
[191]	eval-auc:0.59489
[192]	eval-auc:0.59841
[193]	eval-auc:0.59841
[194]	eval-auc:0.59767
[195]	eval-auc:0.59656
[196]	eval-auc:0.59526
[197]	eval-auc:0.59619
[198]	eval-auc:0.59656
[199]	eval-auc:0.59545
[200]	eval-auc:0.59545
[201]	eval-auc:0.59323
[202]	eval-auc:0.59193
[203]	eval-auc:0.59378
[204]	eval-auc:0.59452
[205]	eval-auc:0.59193
[206]	eval-auc:0.59212
[207]	eval-auc:0.59304
[208]	eval-auc:0.59175
[209]	eval-auc:0.59267
[210]	eval-auc:0.59304
[211]	eval-auc:0.59083
[212]	eval-auc:0.59249
[213]	eval-auc:0.59156
[214]	eval-auc:0.59212
[215]	eval-auc:0.59156
[216]	eval-auc:0.59471
[217]	eval-auc:0.59083
[218]	eval-auc:0.59341
[219]	eval-auc:0.59212
[220]	eval-auc:0.59452
[221]	eval-auc:0.59378
[222]	eval-auc:0.59563
[223]	eval-auc:0.59767
[224]	eval-auc:0.59711
[225]	eval-auc:0.59471
[226]	eval-auc:0.59582
[227]	eval-auc:0.59415
[228]	eval-auc:0.59046
[229]	eval-auc:0.59304
[0]	eval-auc:0.41597
[1]	eval-auc:0.45466
[2]	eval-auc:0.

[58]	eval-auc:0.52750
[59]	eval-auc:0.53570
[60]	eval-auc:0.53461
[61]	eval-auc:0.53406
[62]	eval-auc:0.54637
[63]	eval-auc:0.54747
[64]	eval-auc:0.54528
[65]	eval-auc:0.54419
[66]	eval-auc:0.54282
[67]	eval-auc:0.53516
[68]	eval-auc:0.52996
[69]	eval-auc:0.53488
[70]	eval-auc:0.53653
[71]	eval-auc:0.53434
[72]	eval-auc:0.53653
[73]	eval-auc:0.54118
[74]	eval-auc:0.53871
[75]	eval-auc:0.52668
[76]	eval-auc:0.53160
[77]	eval-auc:0.53215
[78]	eval-auc:0.53269
[79]	eval-auc:0.53488
[80]	eval-auc:0.54337
[81]	eval-auc:0.54555
[82]	eval-auc:0.54829
[83]	eval-auc:0.55130
[84]	eval-auc:0.55212
[85]	eval-auc:0.55239
[86]	eval-auc:0.55458
[87]	eval-auc:0.55294
[88]	eval-auc:0.55130
[89]	eval-auc:0.55458
[90]	eval-auc:0.54637
[91]	eval-auc:0.54501
[92]	eval-auc:0.54856
[93]	eval-auc:0.54802
[94]	eval-auc:0.54473
[95]	eval-auc:0.54829
[96]	eval-auc:0.54993
[97]	eval-auc:0.55540
[98]	eval-auc:0.55157
[99]	eval-auc:0.55732
[100]	eval-auc:0.55540
[101]	eval-auc:0.54993
[102]	eval-auc:0.55349
[103]	e

[33]	eval-auc:0.56008
[34]	eval-auc:0.56069
[35]	eval-auc:0.56214
[36]	eval-auc:0.55573
[37]	eval-auc:0.55694
[38]	eval-auc:0.55779
[39]	eval-auc:0.55464
[40]	eval-auc:0.56250
[41]	eval-auc:0.55887
[42]	eval-auc:0.55452
[43]	eval-auc:0.55513
[44]	eval-auc:0.55392
[45]	eval-auc:0.55126
[46]	eval-auc:0.55307
[47]	eval-auc:0.55114
[48]	eval-auc:0.55089
[49]	eval-auc:0.54751
[50]	eval-auc:0.55343
[51]	eval-auc:0.54884
[52]	eval-auc:0.54981
[53]	eval-auc:0.54340
[54]	eval-auc:0.54521
[55]	eval-auc:0.53989
[56]	eval-auc:0.54908
[57]	eval-auc:0.54739
[58]	eval-auc:0.54836
[59]	eval-auc:0.54944
[60]	eval-auc:0.54981
[61]	eval-auc:0.55488
[62]	eval-auc:0.55899
[63]	eval-auc:0.55404
[64]	eval-auc:0.55210
[65]	eval-auc:0.54981
[66]	eval-auc:0.54703
[67]	eval-auc:0.54884
[68]	eval-auc:0.54860
[69]	eval-auc:0.55573
[70]	eval-auc:0.55621
[71]	eval-auc:0.55513
[72]	eval-auc:0.55561
[73]	eval-auc:0.55392
[74]	eval-auc:0.55114
[75]	eval-auc:0.54993
[76]	eval-auc:0.55331
[77]	eval-auc:0.55005
[78]	eval-

[175]	eval-auc:0.65631
[176]	eval-auc:0.65799
[177]	eval-auc:0.65696
[178]	eval-auc:0.65722
[179]	eval-auc:0.65786
[180]	eval-auc:0.65864
[181]	eval-auc:0.65670
[182]	eval-auc:0.65683
[183]	eval-auc:0.65592
[184]	eval-auc:0.65773
[185]	eval-auc:0.65773
[186]	eval-auc:0.65864
[187]	eval-auc:0.65799
[188]	eval-auc:0.65929
[189]	eval-auc:0.65864
[190]	eval-auc:0.65942
[191]	eval-auc:0.65968
[192]	eval-auc:0.65825
[193]	eval-auc:0.65748
[194]	eval-auc:0.65851
[195]	eval-auc:0.65955
[196]	eval-auc:0.65838
[197]	eval-auc:0.65709
[198]	eval-auc:0.65761
[199]	eval-auc:0.65812
[200]	eval-auc:0.65748
[201]	eval-auc:0.65786
[202]	eval-auc:0.65812
[203]	eval-auc:0.65825
[204]	eval-auc:0.65929
[205]	eval-auc:0.66058
[206]	eval-auc:0.66071
[207]	eval-auc:0.66045
[208]	eval-auc:0.66097
[209]	eval-auc:0.66278
[210]	eval-auc:0.66265
[211]	eval-auc:0.66019
[212]	eval-auc:0.65968
[213]	eval-auc:0.65955
[214]	eval-auc:0.65890
[215]	eval-auc:0.65864
[216]	eval-auc:0.65942
[217]	eval-auc:0.65864
[218]	eval-

[38]	eval-auc:0.50306
[39]	eval-auc:0.49303
[40]	eval-auc:0.49031
[41]	eval-auc:0.48469
[42]	eval-auc:0.48537
[43]	eval-auc:0.48265
[44]	eval-auc:0.48095
[45]	eval-auc:0.48827
[46]	eval-auc:0.49371
[47]	eval-auc:0.48963
[48]	eval-auc:0.49082
[49]	eval-auc:0.47789
[50]	eval-auc:0.47738
[51]	eval-auc:0.48759
[52]	eval-auc:0.50068
[53]	eval-auc:0.49915
[54]	eval-auc:0.50459
[55]	eval-auc:0.49932
[56]	eval-auc:0.50187
[57]	eval-auc:0.50816
[58]	eval-auc:0.50561
[59]	eval-auc:0.50918
[60]	eval-auc:0.50476
[61]	eval-auc:0.50170
[62]	eval-auc:0.49592
[63]	eval-auc:0.49796
[64]	eval-auc:0.49439
[65]	eval-auc:0.49286
[66]	eval-auc:0.48810
[67]	eval-auc:0.49201
[68]	eval-auc:0.49507
[69]	eval-auc:0.49337
[70]	eval-auc:0.49949
[71]	eval-auc:0.49864
[72]	eval-auc:0.49728
[73]	eval-auc:0.49796
[74]	eval-auc:0.50680
[75]	eval-auc:0.50680
[76]	eval-auc:0.50204
[77]	eval-auc:0.50085
[78]	eval-auc:0.50306
[79]	eval-auc:0.49966
[80]	eval-auc:0.49983
[81]	eval-auc:0.50221
[82]	eval-auc:0.50459
[83]	eval-

[172]	eval-auc:0.58790
[173]	eval-auc:0.58997
[174]	eval-auc:0.59089
[175]	eval-auc:0.58974
[176]	eval-auc:0.58997
[177]	eval-auc:0.58951
[178]	eval-auc:0.59043
[179]	eval-auc:0.58997
[180]	eval-auc:0.59089
[181]	eval-auc:0.58882
[182]	eval-auc:0.58905
[183]	eval-auc:0.59043
[184]	eval-auc:0.59158
[185]	eval-auc:0.59112
[186]	eval-auc:0.59112
[187]	eval-auc:0.59434
[188]	eval-auc:0.59319
[189]	eval-auc:0.59112
[190]	eval-auc:0.59250
[191]	eval-auc:0.58974
[192]	eval-auc:0.59204
[193]	eval-auc:0.59020
[194]	eval-auc:0.59089
[195]	eval-auc:0.59043
[196]	eval-auc:0.59250
[197]	eval-auc:0.59204
[198]	eval-auc:0.58928
[199]	eval-auc:0.59135
[200]	eval-auc:0.59043
[201]	eval-auc:0.59020
[202]	eval-auc:0.59020
[203]	eval-auc:0.59066
[0]	eval-auc:0.45315
[1]	eval-auc:0.51099
[2]	eval-auc:0.52339
[3]	eval-auc:0.52473
[4]	eval-auc:0.56237
[5]	eval-auc:0.56623
[6]	eval-auc:0.57707
[7]	eval-auc:0.56913
[8]	eval-auc:0.54292
[9]	eval-auc:0.53616
[10]	eval-auc:0.52532
[11]	eval-auc:0.53096
[12]	eval-

[51]	eval-auc:0.66774
[52]	eval-auc:0.66870
[53]	eval-auc:0.66795
[54]	eval-auc:0.67084
[55]	eval-auc:0.67052
[56]	eval-auc:0.66581
[57]	eval-auc:0.66977
[58]	eval-auc:0.66506
[59]	eval-auc:0.66677
[60]	eval-auc:0.66709
[61]	eval-auc:0.66474
[62]	eval-auc:0.66474
[63]	eval-auc:0.66207
[64]	eval-auc:0.66367
[65]	eval-auc:0.66389
[66]	eval-auc:0.66463
[67]	eval-auc:0.66356
[68]	eval-auc:0.66324
[69]	eval-auc:0.66378
[70]	eval-auc:0.66046
[71]	eval-auc:0.65704
[72]	eval-auc:0.65682
[73]	eval-auc:0.65586
[74]	eval-auc:0.65597
[75]	eval-auc:0.65779
[76]	eval-auc:0.65586
[77]	eval-auc:0.65479
[78]	eval-auc:0.65383
[79]	eval-auc:0.65062
[80]	eval-auc:0.65287
[81]	eval-auc:0.65554
[82]	eval-auc:0.65811
[83]	eval-auc:0.65822
[84]	eval-auc:0.66046
[85]	eval-auc:0.66185
[86]	eval-auc:0.65672
[87]	eval-auc:0.65586
[88]	eval-auc:0.65757
[89]	eval-auc:0.65351
[90]	eval-auc:0.65725
[91]	eval-auc:0.65854
[92]	eval-auc:0.65736
[93]	eval-auc:0.65747
[94]	eval-auc:0.65618
[95]	eval-auc:0.65789
[96]	eval-

[180]	eval-auc:0.55814
[181]	eval-auc:0.55763
[182]	eval-auc:0.55226
[183]	eval-auc:0.55840
[184]	eval-auc:0.55942
[185]	eval-auc:0.55865
[186]	eval-auc:0.56121
[187]	eval-auc:0.55737
[188]	eval-auc:0.55763
[189]	eval-auc:0.55482
[190]	eval-auc:0.55635
[191]	eval-auc:0.55661
[192]	eval-auc:0.55712
[193]	eval-auc:0.55610
[194]	eval-auc:0.55431
[195]	eval-auc:0.55661
[196]	eval-auc:0.55533
[197]	eval-auc:0.55635
[198]	eval-auc:0.55482
[199]	eval-auc:0.55814
[200]	eval-auc:0.55712
[201]	eval-auc:0.55763
[202]	eval-auc:0.55737
[203]	eval-auc:0.55788
[204]	eval-auc:0.55456
[0]	eval-auc:0.43760
[1]	eval-auc:0.45894
[2]	eval-auc:0.49563
[3]	eval-auc:0.54081
[4]	eval-auc:0.50582
[5]	eval-auc:0.55258
[6]	eval-auc:0.56325
[7]	eval-auc:0.58072
[8]	eval-auc:0.58411
[9]	eval-auc:0.58120
[10]	eval-auc:0.55525
[11]	eval-auc:0.56034
[12]	eval-auc:0.56434
[13]	eval-auc:0.55864
[14]	eval-auc:0.56058
[15]	eval-auc:0.55549
[16]	eval-auc:0.55658
[17]	eval-auc:0.55209
[18]	eval-auc:0.55003
[19]	eval-auc:0.5

In [2]:
print(f"Baseline Avg AUC: {avg_auc_baseline}")
print(f"Baseline Avg F1 Score: {avg_f1_baseline}")

Baseline Avg AUC: 0.5739002718452387
Baseline Avg F1 Score: 0.5056693175619199
