# Higgs Boson - Cross Validation

(example from xgboost docs)

## [Link](https://www.kaggle.com/c/higgs-boson)

In [1]:
import numpy as np
import xgboost as xgb

Load Data

In [4]:
train = np.loadtxt(
    'data/higgs/training.csv',
    delimiter=',',
    skiprows=1,
    converters={32: lambda x:int(x=='s'.encode('utf-8')) }
)

In [5]:
label  = train[:,32]
data   = train[:,1:31]
weight = train[:,31]

In [6]:
dtrain = xgb.DMatrix(
    data,
    label=label,
    missing = -999.0,
    weight=weight
)
param = {'max_depth':6, 'eta':0.1, 'silent':1, 'objective':'binary:logitraw', 'nthread':4}
num_round = 120

Preprocess Data. Set `scale_pos_weight`

In [7]:
def fpreproc(dtrain, dtest, param):
    label = dtrain.get_label()
    ratio = float(np.sum(label == 0)) / np.sum(label==1)
    param['scale_pos_weight'] = ratio
    wtrain = dtrain.get_weight()
    wtest = dtest.get_weight()
    sum_weight = sum(wtrain) + sum(wtest)
    wtrain *= sum_weight / sum(wtrain)
    wtest *= sum_weight / sum(wtest)
    dtrain.set_weight(wtrain)
    dtest.set_weight(wtest)
    return (dtrain, dtest, param)

cross validation, for each fold

In [8]:
xgb.cv(
    param,
    dtrain,
    num_round,
    nfold=5,
    metrics={'ams@0.15', 'auc'},
    seed = 0,
    fpreproc = fpreproc
)

Unnamed: 0,test-ams@0.15-mean,test-ams@0.15-std,test-auc-mean,test-auc-std,train-ams@0.15-mean,train-ams@0.15-std,train-auc-mean,train-auc-std
0,2.425888,0.048331,0.766509,0.002047,2.466789,0.135585,0.766991,0.002400
1,2.712669,0.067334,0.790874,0.024042,2.762453,0.061388,0.791838,0.024179
2,2.790407,0.078775,0.800007,0.018828,2.828868,0.053535,0.801198,0.019148
3,2.831109,0.068367,0.838812,0.001419,2.884423,0.048262,0.839963,0.001457
4,2.859036,0.088960,0.847975,0.008280,2.924717,0.035826,0.849338,0.008171
5,2.886730,0.101294,0.861723,0.002475,2.943748,0.028000,0.862899,0.001918
6,2.898807,0.088099,0.864512,0.002793,2.970938,0.024539,0.865870,0.001335
7,2.908321,0.089097,0.866929,0.001365,2.990000,0.040041,0.868330,0.000874
8,2.945336,0.086684,0.867718,0.001333,3.014141,0.021463,0.869221,0.000588
9,2.938141,0.066470,0.869849,0.001984,3.019014,0.037778,0.871348,0.001543
