In [None]:
import numpy as np
import pandas as pd

# Stratified k-fold cross-validation
from sklearn.model_selection import StratifiedKFold
# RandomForest
from sklearn.ensemble import RandomForestClassifier
# CatBoost
from catboost import CatBoostClassifier
# LightGBM
import lightgbm as lgb
# XGBoost
import xgboost as xgb
from sklearn.model_selection import GridSearchCV

# 指標を計算するため
from sklearn.metrics import accuracy_score, cohen_kappa_score, make_scorer, f1_score, recall_score

# 見た目を綺麗にするため
import matplotlib.pyplot as plt
import pprint, pydotplus

In [None]:
# データの読み込み
df=pd.read_csv('Table.csv')

#X yの設定
#X：.dropで行or列を削除、axis=1で行を削除と指定。'Subject'の行を除外する。
X=df.drop('subject',axis=1) 

#y：df内からSubjectの行のみを取り出す。
y=df.subject

In [None]:
#層化k分割交差検証　n_splits=10のため 1/10 * 10回での検討
skf = StratifiedKFold(n_splits=10,
                      shuffle=True,
                      random_state=0)

In [None]:
%%time
#モデル選択
model = xgb.XGBClassifier()

# パラメーター設定
param_grid = {"max_depth": [ 3, 6, 10,25], #10, 25,
              "learning_rate" : [0.0001,0.001,0.01], # 0.05,0.1
              "min_child_weight" : [1,3,6],
              "n_estimators": [100,200,300], # 500
              "subsample": [0.5,0.75,0.9],
              "gamma":[0,0.1,0.2],
              "eta": [0.3,0.15,0.10]
             }

# パラメータチューニングをグリッドサーチで行うために設定する
# このGridSearchCV には注意が必要 scoring は そのスコアを基準にして最適化する
grid_result = GridSearchCV(estimator = model,
                           param_grid = param_grid,
                           scoring = 'accuracy',
                           cv = skf,
                           verbose=3,
                           return_train_score = True,
                           n_jobs = -1)
grid_result.fit(X,y)

In [None]:
#ベストな分類器を抽出
pprint.pprint(grid_result.best_estimator_)

In [None]:
#ベストなパラメータを抽出
pprint.pprint(grid_result.best_params_)

In [None]:
#ベストな正解率を抽出
pprint.pprint(grid_result.best_score_)