Baseline

In [1]:
import xgboost as xgb
from sklearn.model_selection import LeaveOneGroupOut
from sklearn.metrics import roc_auc_score, f1_score
from sklearn.metrics import roc_curve
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

auc_scores = []
f1_scores = []

params = {
    'max_depth': 6,
    'eta': 0.3,
    'objective': 'binary:logistic',
    'eval_metric': 'auc',
    "gamma": 0,
    "subsample": 1,
    "colsample_bytree": 1,
    "colsample_bylevel": 1,
    "scale_pos_weight": 1,
    "reg_alpha": 0,
    "reg_lambda": 1,
}

data_folder = './data_baseline_2/'

for event in range(17):

    # load data
    dtrain = xgb.DMatrix(data_folder + '{}_train.data'.format(event))
    dtest = xgb.DMatrix(data_folder + '{}_val.data'.format(event))
    deval = xgb.DMatrix(data_folder + '{}_val.data'.format(event))
    
    # train
    evallist = [(dtrain, 'train'), (deval, 'eval')]
    bst = xgb.train(params, dtrain, num_boost_round=1000, evals=[(dtest, 'eval')], early_stopping_rounds=200)
    # predict probabilities
    y_pred_proba = bst.predict(dtest)
    # Convert predicted probabilities to class labels
    y_pred_labels = (y_pred_proba > 0.5).astype(int)
    
    y_real = dtest.get_label()
    
    # Calculate AUC
    auc = roc_auc_score(y_real, y_pred_proba)
    auc_scores.append(auc)
    
    # Calculate F1 score
    f1 = f1_score(y_real, y_pred_labels, average='macro')
    f1_scores.append(f1)
    
# Average AUC and F1 scores across all groups
avg_auc_baseline = np.mean(auc_scores)
avg_f1_baseline = np.mean(f1_scores)

print(f"Avg AUC: {avg_auc_baseline}")
print(f"Avg F1 Score: {avg_f1_baseline}")

[0]	eval-auc:0.55841
[1]	eval-auc:0.62520
[2]	eval-auc:0.65152
[3]	eval-auc:0.62998
[4]	eval-auc:0.63816
[5]	eval-auc:0.61065
[6]	eval-auc:0.59350
[7]	eval-auc:0.53289
[8]	eval-auc:0.50439
[9]	eval-auc:0.51555
[10]	eval-auc:0.52871
[11]	eval-auc:0.53110
[12]	eval-auc:0.53828
[13]	eval-auc:0.52671
[14]	eval-auc:0.52313
[15]	eval-auc:0.52472
[16]	eval-auc:0.51994
[17]	eval-auc:0.53987
[18]	eval-auc:0.55343
[19]	eval-auc:0.54665
[20]	eval-auc:0.55542
[21]	eval-auc:0.59370
[22]	eval-auc:0.63278
[23]	eval-auc:0.65032
[24]	eval-auc:0.63915
[25]	eval-auc:0.62520
[26]	eval-auc:0.62440
[27]	eval-auc:0.62640
[28]	eval-auc:0.61962
[29]	eval-auc:0.60805
[30]	eval-auc:0.61324
[31]	eval-auc:0.60965
[32]	eval-auc:0.61603
[33]	eval-auc:0.62480
[34]	eval-auc:0.62321
[35]	eval-auc:0.62281
[36]	eval-auc:0.61962
[37]	eval-auc:0.62480
[38]	eval-auc:0.62081
[39]	eval-auc:0.61603
[40]	eval-auc:0.62520
[41]	eval-auc:0.63158
[42]	eval-auc:0.63477
[43]	eval-auc:0.62520
[44]	eval-auc:0.63517
[45]	eval-auc:0.6359

[163]	eval-auc:0.62101
[164]	eval-auc:0.61848
[165]	eval-auc:0.62174
[166]	eval-auc:0.62572
[167]	eval-auc:0.62391
[168]	eval-auc:0.62681
[169]	eval-auc:0.62826
[170]	eval-auc:0.62790
[171]	eval-auc:0.62935
[172]	eval-auc:0.63043
[173]	eval-auc:0.63261
[174]	eval-auc:0.62899
[175]	eval-auc:0.63043
[176]	eval-auc:0.63188
[177]	eval-auc:0.63442
[178]	eval-auc:0.63732
[179]	eval-auc:0.63478
[180]	eval-auc:0.63406
[181]	eval-auc:0.63659
[182]	eval-auc:0.63913
[183]	eval-auc:0.63732
[184]	eval-auc:0.63370
[185]	eval-auc:0.63225
[186]	eval-auc:0.63261
[187]	eval-auc:0.63007
[188]	eval-auc:0.62935
[189]	eval-auc:0.63007
[190]	eval-auc:0.62826
[191]	eval-auc:0.62681
[192]	eval-auc:0.62500
[193]	eval-auc:0.62536
[194]	eval-auc:0.62826
[195]	eval-auc:0.62572
[196]	eval-auc:0.62790
[197]	eval-auc:0.63080
[198]	eval-auc:0.63478
[199]	eval-auc:0.63188
[200]	eval-auc:0.63225
[201]	eval-auc:0.63333
[202]	eval-auc:0.63514
[203]	eval-auc:0.63696
[204]	eval-auc:0.63333
[205]	eval-auc:0.63297
[206]	eval-

[142]	eval-auc:0.54167
[143]	eval-auc:0.53883
[144]	eval-auc:0.54248
[145]	eval-auc:0.54288
[146]	eval-auc:0.54288
[147]	eval-auc:0.54571
[148]	eval-auc:0.54450
[149]	eval-auc:0.54490
[150]	eval-auc:0.54288
[151]	eval-auc:0.54207
[152]	eval-auc:0.54045
[153]	eval-auc:0.53883
[154]	eval-auc:0.54086
[155]	eval-auc:0.54086
[156]	eval-auc:0.53843
[157]	eval-auc:0.53924
[158]	eval-auc:0.53924
[159]	eval-auc:0.53964
[160]	eval-auc:0.54248
[161]	eval-auc:0.54086
[162]	eval-auc:0.54207
[163]	eval-auc:0.54126
[164]	eval-auc:0.54126
[165]	eval-auc:0.54126
[166]	eval-auc:0.53924
[167]	eval-auc:0.53964
[168]	eval-auc:0.54126
[169]	eval-auc:0.54045
[170]	eval-auc:0.53883
[171]	eval-auc:0.54086
[172]	eval-auc:0.54086
[173]	eval-auc:0.54328
[174]	eval-auc:0.54005
[175]	eval-auc:0.54328
[176]	eval-auc:0.54005
[177]	eval-auc:0.54167
[178]	eval-auc:0.54248
[179]	eval-auc:0.54369
[180]	eval-auc:0.54126
[181]	eval-auc:0.54369
[182]	eval-auc:0.54409
[183]	eval-auc:0.54450
[184]	eval-auc:0.54612
[185]	eval-

[263]	eval-auc:0.54696
[264]	eval-auc:0.54644
[265]	eval-auc:0.54592
[266]	eval-auc:0.54747
[267]	eval-auc:0.54747
[268]	eval-auc:0.54592
[269]	eval-auc:0.54592
[270]	eval-auc:0.54747
[271]	eval-auc:0.54644
[272]	eval-auc:0.54850
[273]	eval-auc:0.54592
[274]	eval-auc:0.54438
[275]	eval-auc:0.54283
[276]	eval-auc:0.54180
[277]	eval-auc:0.54489
[278]	eval-auc:0.54489
[279]	eval-auc:0.54592
[280]	eval-auc:0.54696
[281]	eval-auc:0.54902
[282]	eval-auc:0.54954
[283]	eval-auc:0.55108
[284]	eval-auc:0.55160
[285]	eval-auc:0.55212
[286]	eval-auc:0.55160
[287]	eval-auc:0.55263
[288]	eval-auc:0.55366
[289]	eval-auc:0.55418
[290]	eval-auc:0.55212
[291]	eval-auc:0.55263
[292]	eval-auc:0.55418
[293]	eval-auc:0.55418
[294]	eval-auc:0.55624
[295]	eval-auc:0.55418
[296]	eval-auc:0.55470
[297]	eval-auc:0.55470
[298]	eval-auc:0.55418
[299]	eval-auc:0.55315
[300]	eval-auc:0.55418
[301]	eval-auc:0.55366
[302]	eval-auc:0.55521
[303]	eval-auc:0.55470
[304]	eval-auc:0.55470
[305]	eval-auc:0.55263
[306]	eval-

[93]	eval-auc:0.46893
[94]	eval-auc:0.46855
[95]	eval-auc:0.47232
[96]	eval-auc:0.47533
[97]	eval-auc:0.48023
[98]	eval-auc:0.47872
[99]	eval-auc:0.47797
[100]	eval-auc:0.49040
[101]	eval-auc:0.48738
[102]	eval-auc:0.48588
[103]	eval-auc:0.48136
[104]	eval-auc:0.47797
[105]	eval-auc:0.48249
[106]	eval-auc:0.48211
[107]	eval-auc:0.48399
[108]	eval-auc:0.48776
[109]	eval-auc:0.49680
[110]	eval-auc:0.49529
[111]	eval-auc:0.49755
[112]	eval-auc:0.50169
[113]	eval-auc:0.50132
[114]	eval-auc:0.49718
[115]	eval-auc:0.49492
[116]	eval-auc:0.49303
[117]	eval-auc:0.49153
[118]	eval-auc:0.48814
[119]	eval-auc:0.49190
[120]	eval-auc:0.48927
[121]	eval-auc:0.48437
[122]	eval-auc:0.48663
[123]	eval-auc:0.48437
[124]	eval-auc:0.48927
[125]	eval-auc:0.48738
[126]	eval-auc:0.48399
[127]	eval-auc:0.48512
[128]	eval-auc:0.47947
[129]	eval-auc:0.47834
[130]	eval-auc:0.48475
[131]	eval-auc:0.48738
[132]	eval-auc:0.47797
[133]	eval-auc:0.47721
[134]	eval-auc:0.47797
[135]	eval-auc:0.47646
[136]	eval-auc:0.4

[35]	eval-auc:0.43202
[36]	eval-auc:0.42623
[37]	eval-auc:0.42912
[38]	eval-auc:0.41273
[39]	eval-auc:0.41369
[40]	eval-auc:0.41176
[41]	eval-auc:0.39392
[42]	eval-auc:0.38814
[43]	eval-auc:0.38959
[44]	eval-auc:0.39007
[45]	eval-auc:0.38621
[46]	eval-auc:0.37464
[47]	eval-auc:0.39537
[48]	eval-auc:0.40694
[49]	eval-auc:0.41273
[50]	eval-auc:0.42430
[51]	eval-auc:0.42334
[52]	eval-auc:0.42960
[53]	eval-auc:0.42816
[54]	eval-auc:0.43009
[55]	eval-auc:0.42768
[56]	eval-auc:0.43828
[57]	eval-auc:0.43105
[58]	eval-auc:0.44069
[59]	eval-auc:0.43828
[60]	eval-auc:0.44648
[61]	eval-auc:0.43057
[62]	eval-auc:0.43250
[63]	eval-auc:0.43587
[64]	eval-auc:0.43635
[65]	eval-auc:0.42912
[66]	eval-auc:0.42623
[67]	eval-auc:0.42575
[68]	eval-auc:0.42237
[69]	eval-auc:0.42671
[70]	eval-auc:0.41900
[71]	eval-auc:0.41900
[72]	eval-auc:0.41610
[73]	eval-auc:0.41466
[74]	eval-auc:0.40791
[75]	eval-auc:0.41321
[76]	eval-auc:0.41707
[77]	eval-auc:0.41225
[78]	eval-auc:0.40887
[79]	eval-auc:0.40694
[80]	eval-

[197]	eval-auc:0.46239
[198]	eval-auc:0.46061
[199]	eval-auc:0.46203
[200]	eval-auc:0.45989
[201]	eval-auc:0.45918
[202]	eval-auc:0.46061
[203]	eval-auc:0.46239
[204]	eval-auc:0.46203
[205]	eval-auc:0.46310
[206]	eval-auc:0.46381
[207]	eval-auc:0.46631
[208]	eval-auc:0.46595
[209]	eval-auc:0.46631
[210]	eval-auc:0.46631
[211]	eval-auc:0.46417
[212]	eval-auc:0.46203
[213]	eval-auc:0.46203
[214]	eval-auc:0.46453
[215]	eval-auc:0.46310
[216]	eval-auc:0.46346
[217]	eval-auc:0.46239
[218]	eval-auc:0.46275
[219]	eval-auc:0.46239
[220]	eval-auc:0.46310
[221]	eval-auc:0.46168
[222]	eval-auc:0.46025
[223]	eval-auc:0.46061
[224]	eval-auc:0.46381
[225]	eval-auc:0.46560
[226]	eval-auc:0.46560
[227]	eval-auc:0.46595
[228]	eval-auc:0.46488
[229]	eval-auc:0.46453
[230]	eval-auc:0.46096
[231]	eval-auc:0.46381
[232]	eval-auc:0.46239
[233]	eval-auc:0.46453
[234]	eval-auc:0.46453
[235]	eval-auc:0.46488
[236]	eval-auc:0.46417
[237]	eval-auc:0.46346
[238]	eval-auc:0.46168
[239]	eval-auc:0.46061
[240]	eval-

[247]	eval-auc:0.56667
[248]	eval-auc:0.56869
[249]	eval-auc:0.56616
[250]	eval-auc:0.56768
[251]	eval-auc:0.56515
[252]	eval-auc:0.56566
[253]	eval-auc:0.56566
[254]	eval-auc:0.56667
[255]	eval-auc:0.56667
[256]	eval-auc:0.56667
[257]	eval-auc:0.56566
[258]	eval-auc:0.56465
[259]	eval-auc:0.56566
[260]	eval-auc:0.56616
[261]	eval-auc:0.56768
[262]	eval-auc:0.56768
[263]	eval-auc:0.56616
[264]	eval-auc:0.56566
[265]	eval-auc:0.56616
[266]	eval-auc:0.56515
[267]	eval-auc:0.56667
[268]	eval-auc:0.56616
[269]	eval-auc:0.56515
[270]	eval-auc:0.56869
[271]	eval-auc:0.56919
[272]	eval-auc:0.56919
[273]	eval-auc:0.56970
[274]	eval-auc:0.56667
[275]	eval-auc:0.56919
[276]	eval-auc:0.56919
[277]	eval-auc:0.56919
[278]	eval-auc:0.56768
[279]	eval-auc:0.56566
[280]	eval-auc:0.56717
[281]	eval-auc:0.56667
[282]	eval-auc:0.56566
[283]	eval-auc:0.56465
[284]	eval-auc:0.56566
[285]	eval-auc:0.56566
[286]	eval-auc:0.56465
[287]	eval-auc:0.56566
[288]	eval-auc:0.56818
[289]	eval-auc:0.56970
[290]	eval-

[43]	eval-auc:0.59491
[44]	eval-auc:0.60576
[45]	eval-auc:0.61535
[46]	eval-auc:0.62537
[47]	eval-auc:0.61786
[48]	eval-auc:0.61911
[49]	eval-auc:0.62119
[50]	eval-auc:0.62578
[51]	eval-auc:0.63705
[52]	eval-auc:0.64330
[53]	eval-auc:0.64497
[54]	eval-auc:0.63955
[55]	eval-auc:0.63997
[56]	eval-auc:0.63204
[57]	eval-auc:0.62078
[58]	eval-auc:0.62119
[59]	eval-auc:0.62954
[60]	eval-auc:0.62620
[61]	eval-auc:0.63579
[62]	eval-auc:0.64122
[63]	eval-auc:0.63746
[64]	eval-auc:0.63872
[65]	eval-auc:0.63329
[66]	eval-auc:0.64664
[67]	eval-auc:0.64456
[68]	eval-auc:0.63496
[69]	eval-auc:0.62995
[70]	eval-auc:0.63371
[71]	eval-auc:0.63913
[72]	eval-auc:0.63913
[73]	eval-auc:0.64038
[74]	eval-auc:0.63872
[75]	eval-auc:0.63663
[76]	eval-auc:0.63872
[77]	eval-auc:0.63579
[78]	eval-auc:0.63621
[79]	eval-auc:0.63538
[80]	eval-auc:0.63413
[81]	eval-auc:0.63538
[82]	eval-auc:0.63329
[83]	eval-auc:0.63663
[84]	eval-auc:0.63496
[85]	eval-auc:0.63872
[86]	eval-auc:0.64289
[87]	eval-auc:0.64664
[88]	eval-

[117]	eval-auc:0.44726
[118]	eval-auc:0.44226
[119]	eval-auc:0.44192
[120]	eval-auc:0.44326
[121]	eval-auc:0.44426
[122]	eval-auc:0.44126
[123]	eval-auc:0.43591
[124]	eval-auc:0.43625
[125]	eval-auc:0.43358
[126]	eval-auc:0.43324
[127]	eval-auc:0.43558
[128]	eval-auc:0.43825
[129]	eval-auc:0.43758
[130]	eval-auc:0.44192
[131]	eval-auc:0.43992
[132]	eval-auc:0.43892
[133]	eval-auc:0.43758
[134]	eval-auc:0.44159
[135]	eval-auc:0.43591
[136]	eval-auc:0.44092
[137]	eval-auc:0.43458
[138]	eval-auc:0.43358
[139]	eval-auc:0.43391
[140]	eval-auc:0.43591
[141]	eval-auc:0.43992
[142]	eval-auc:0.43825
[143]	eval-auc:0.43458
[144]	eval-auc:0.43558
[145]	eval-auc:0.43558
[146]	eval-auc:0.43858
[147]	eval-auc:0.43858
[148]	eval-auc:0.43792
[149]	eval-auc:0.43291
[150]	eval-auc:0.43358
[151]	eval-auc:0.42957
[152]	eval-auc:0.42256
[153]	eval-auc:0.42356
[154]	eval-auc:0.42056
[155]	eval-auc:0.42023
[156]	eval-auc:0.41923
[157]	eval-auc:0.41789
[158]	eval-auc:0.41789
[159]	eval-auc:0.42156
[160]	eval-

[269]	eval-auc:0.68504
[270]	eval-auc:0.68504
[271]	eval-auc:0.68908
[272]	eval-auc:0.68706
[273]	eval-auc:0.68605
[274]	eval-auc:0.68605
[275]	eval-auc:0.68504
[276]	eval-auc:0.68605
[277]	eval-auc:0.68706
[278]	eval-auc:0.68605
[279]	eval-auc:0.68605
[280]	eval-auc:0.68504
[281]	eval-auc:0.68453
[282]	eval-auc:0.68251
[283]	eval-auc:0.68150
[284]	eval-auc:0.68251
[285]	eval-auc:0.68301
[286]	eval-auc:0.68200
[287]	eval-auc:0.68251
[288]	eval-auc:0.68402
[289]	eval-auc:0.68402
[0]	eval-auc:0.51712
[1]	eval-auc:0.44325
[2]	eval-auc:0.44843
[3]	eval-auc:0.47431
[4]	eval-auc:0.51155
[5]	eval-auc:0.50199
[6]	eval-auc:0.52310
[7]	eval-auc:0.50976
[8]	eval-auc:0.50856
[9]	eval-auc:0.51513
[10]	eval-auc:0.51752
[11]	eval-auc:0.50179
[12]	eval-auc:0.51135
[13]	eval-auc:0.52409
[14]	eval-auc:0.50498
[15]	eval-auc:0.49542
[16]	eval-auc:0.50538
[17]	eval-auc:0.50060
[18]	eval-auc:0.49582
[19]	eval-auc:0.48626
[20]	eval-auc:0.49303
[21]	eval-auc:0.50299
[22]	eval-auc:0.49821
[23]	eval-auc:0.50179

[50]	eval-auc:0.58161
[51]	eval-auc:0.58161
[52]	eval-auc:0.60000
[53]	eval-auc:0.60345
[54]	eval-auc:0.60575
[55]	eval-auc:0.61264
[56]	eval-auc:0.61724
[57]	eval-auc:0.62069
[58]	eval-auc:0.61149
[59]	eval-auc:0.60460
[60]	eval-auc:0.59770
[61]	eval-auc:0.60230
[62]	eval-auc:0.59195
[63]	eval-auc:0.58678
[64]	eval-auc:0.58161
[65]	eval-auc:0.57701
[66]	eval-auc:0.57126
[67]	eval-auc:0.57011
[68]	eval-auc:0.55920
[69]	eval-auc:0.56379
[70]	eval-auc:0.56437
[71]	eval-auc:0.56609
[72]	eval-auc:0.56897
[73]	eval-auc:0.55747
[74]	eval-auc:0.54943
[75]	eval-auc:0.55230
[76]	eval-auc:0.55690
[77]	eval-auc:0.55632
[78]	eval-auc:0.56667
[79]	eval-auc:0.56207
[80]	eval-auc:0.55287
[81]	eval-auc:0.55690
[82]	eval-auc:0.55287
[83]	eval-auc:0.55057
[84]	eval-auc:0.54943
[85]	eval-auc:0.54483
[86]	eval-auc:0.54943
[87]	eval-auc:0.55805
[88]	eval-auc:0.56437
[89]	eval-auc:0.55172
[90]	eval-auc:0.55805
[91]	eval-auc:0.56207
[92]	eval-auc:0.55517
[93]	eval-auc:0.55460
[94]	eval-auc:0.56437
[95]	eval-

In [2]:
print(f"Avg AUC: {avg_auc_baseline}")
print(f"Avg F1 Score: {avg_f1_baseline}")

Avg AUC: 0.5477033780878443
Avg F1 Score: 0.47416307745630465
