## 2.4 評価指標と目的関数

In [1]:
import numpy as np
import pandas as pd

In [2]:
train = pd.read_csv('../input/sample-data/train_preprocessed.csv')
train_x = train.drop(['target'], axis=1)
train_y = train['target']
test_x = pd.read_csv('../input/sample-data/test_preprocessed.csv')

In [3]:
from sklearn.model_selection import KFold

kf = KFold(n_splits=4, shuffle=True, random_state=71)
tr_idx, va_idx = list(kf.split(train_x))[0]

# 学習データを学習データとバリデーションデータに分ける
tr_x, va_x = train_x.iloc[tr_idx], train_x.iloc[va_idx]
tr_y, va_y = train_y.iloc[tr_idx], train_y.iloc[va_idx]

In [6]:
# -----------------------------------
# xgboost におけるカスタム評価指標と目的関数の例
# （参考）https://github.com/dmlc/xgboost/blob/master/demo/guide-python/custom_objective.py
# -----------------------------------
import xgboost as xgb
from sklearn.metrics import log_loss

# 特徴量と目的変数をxgboostのデータ構造に変換する
# 学習データの特徴量と目的変数がtr_x, tr_y、バリデーションデータの特徴量と目的変数がva_x, va_yとする
dtrain = xgb.DMatrix(tr_x, label=tr_y)
dvalid = xgb.DMatrix(va_x, label=va_y)


In [7]:
# カスタム目的関数（この場合はloglossであり、xgboostの'binary:logistic'と等価）
def logregobj(preds, dtrain):
    labels = dtrain.get_label()  # 真の値のラベルを取得
    preds = 1.0 / (1.0 + np.exp(-preds))  # シグモイド関数
    grad = preds - labels  # 勾配
    hess = preds * (1.0 - preds)  # 二階微分値
    return grad, hess


# カスタム評価指標（この場合は誤答率）
def evalerror(preds, dtrain):
    labels = dtrain.get_label()  # 真の値のラベルを取得
    return 'custom-error', float(sum(labels != (preds > 0.0))) / len(labels)


# ハイパーパラメータの設定
params = {'silent': 1, 'random_state': 71}
num_round = 50
watchlist = [(dtrain, 'train'), (dvalid, 'eval')]

# モデルの学習の実行
bst = xgb.train(params, dtrain, num_round, watchlist, obj=logregobj, feval=evalerror)

# 目的関数にbinary:logisticを指定したときと違い、確率に変換する前の値で予測値が出力されるので変換が必要
pred_val = bst.predict(dvalid)
pred = 1.0 / (1.0 + np.exp(-pred_val))
logloss = log_loss(va_y, pred)
print(logloss)

# （参考）通常の方法で学習を行う場合
params = {'silent': 1, 'random_state': 71, 'objective': 'binary:logistic'}
bst = xgb.train(params, dtrain, num_round, watchlist)

pred = bst.predict(dvalid)
logloss = log_loss(va_y, pred)
print(logloss)

Parameters: { "silent" } are not used.

[0]	train-rmse:0.40041	train-custom-error:0.16947	eval-rmse:0.42362	eval-custom-error:0.19080




[1]	train-rmse:0.70228	train-custom-error:0.11547	eval-rmse:0.72145	eval-custom-error:0.14920
[2]	train-rmse:0.98133	train-custom-error:0.10280	eval-rmse:0.99697	eval-custom-error:0.13520
[3]	train-rmse:1.22320	train-custom-error:0.09920	eval-rmse:1.23609	eval-custom-error:0.13680
[4]	train-rmse:1.43864	train-custom-error:0.09453	eval-rmse:1.44949	eval-custom-error:0.13720
[5]	train-rmse:1.63033	train-custom-error:0.08947	eval-rmse:1.63831	eval-custom-error:0.12920
[6]	train-rmse:1.79480	train-custom-error:0.08453	eval-rmse:1.80122	eval-custom-error:0.12920
[7]	train-rmse:1.94509	train-custom-error:0.07920	eval-rmse:1.94581	eval-custom-error:0.12640
[8]	train-rmse:2.06040	train-custom-error:0.07680	eval-rmse:2.06055	eval-custom-error:0.12840
[9]	train-rmse:2.16885	train-custom-error:0.07160	eval-rmse:2.16838	eval-custom-error:0.12400
[10]	train-rmse:2.27799	train-custom-error:0.06853	eval-rmse:2.27434	eval-custom-error:0.12320
[11]	train-rmse:2.37375	train-custom-error:0.06347	eval-rms



[8]	train-logloss:0.24363	eval-logloss:0.30775
[9]	train-logloss:0.23153	eval-logloss:0.30093
[10]	train-logloss:0.22016	eval-logloss:0.29413
[11]	train-logloss:0.20963	eval-logloss:0.28528
[12]	train-logloss:0.19951	eval-logloss:0.27912
[13]	train-logloss:0.19324	eval-logloss:0.27642
[14]	train-logloss:0.18547	eval-logloss:0.27154
[15]	train-logloss:0.17474	eval-logloss:0.26516
[16]	train-logloss:0.16900	eval-logloss:0.26089
[17]	train-logloss:0.16323	eval-logloss:0.25849
[18]	train-logloss:0.15950	eval-logloss:0.25691
[19]	train-logloss:0.15637	eval-logloss:0.25511
[20]	train-logloss:0.14722	eval-logloss:0.25035
[21]	train-logloss:0.14290	eval-logloss:0.24734
[22]	train-logloss:0.13782	eval-logloss:0.24612
[23]	train-logloss:0.13362	eval-logloss:0.24387
[24]	train-logloss:0.13047	eval-logloss:0.24251
[25]	train-logloss:0.12654	eval-logloss:0.24094
[26]	train-logloss:0.12268	eval-logloss:0.24005
[27]	train-logloss:0.11966	eval-logloss:0.23803
[28]	train-logloss:0.11506	eval-logloss:0.