# Higgs Boson - Training

(example from xgboost docs)

## [Link](https://www.kaggle.com/c/higgs-boson)

In [3]:
import numpy as np
import xgboost as xgb

In [4]:
test_size = 550000

In [5]:
dtrain = np.loadtxt('data/higgs/training.csv', delimiter=',', skiprows=1, converters={32: lambda x:int(x=='s'.encode('utf-8')) } )

In [6]:
label  = dtrain[:,32]
data   = dtrain[:,1:31]

In [7]:
weight = dtrain[:,31] * float(test_size) / len(label)

In [8]:
sum_wpos = sum( weight[i] for i in range(len(label)) if label[i] == 1.0  )
sum_wneg = sum( weight[i] for i in range(len(label)) if label[i] == 0.0  )


In [9]:
sum_wpos

1522.3749369677494

In [10]:
sum_wneg

904199.66410839953

In [11]:
ratio = sum_wneg/sum_wpos
ratio

593.94019314937952

In [12]:
xgmat = xgb.DMatrix( data, label=label, missing = -999.0, weight=weight )


Setup parameters for xgboost. use logistic regression loss, use raw prediction before logistic transformation since we only need the rank. Also scale weight of positive examples

In [13]:
param = {}
param['objective'] = 'binary:logitraw'
param['scale_pos_weight'] = sum_wneg/sum_wpos
param['eta'] = 0.1
param['max_depth'] = 6
param['eval_metric'] = 'auc'
param['silent'] = 1
param['nthread'] = 16

In [15]:
plst = list(param.items())+[('eval_metric', 'ams@0.15')]
watchlist = [ (xgmat,'train') ]

In [16]:
num_round = 120

In [17]:
bst = xgb.train( plst, xgmat, num_round, watchlist );

[0]	train-auc:0.910911	train-ams@0.15:3.699574
[1]	train-auc:0.915308	train-ams@0.15:3.971228
[2]	train-auc:0.917743	train-ams@0.15:4.067463
[3]	train-auc:0.919345	train-ams@0.15:4.209762
[4]	train-auc:0.920139	train-ams@0.15:4.130002
[5]	train-auc:0.921023	train-ams@0.15:4.175496
[6]	train-auc:0.921944	train-ams@0.15:4.261817
[7]	train-auc:0.922337	train-ams@0.15:4.262321
[8]	train-auc:0.923330	train-ams@0.15:4.326685
[9]	train-auc:0.924186	train-ams@0.15:4.383842
[10]	train-auc:0.924737	train-ams@0.15:4.388791
[11]	train-auc:0.925317	train-ams@0.15:4.407903
[12]	train-auc:0.925923	train-ams@0.15:4.446479
[13]	train-auc:0.926334	train-ams@0.15:4.450507
[14]	train-auc:0.926958	train-ams@0.15:4.489145
[15]	train-auc:0.927428	train-ams@0.15:4.517542
[16]	train-auc:0.928081	train-ams@0.15:4.537076
[17]	train-auc:0.928468	train-ams@0.15:4.549403
[18]	train-auc:0.929035	train-ams@0.15:4.596463
[19]	train-auc:0.929520	train-ams@0.15:4.643400
[20]	train-auc:0.930040	train-ams@0.15:4.714101
[2

In [18]:
bst.save_model('higgs.model')