In [None]:
import numpy as np
import pandas as pd

# Stratified k-fold cross-validation
from sklearn.model_selection import StratifiedKFold
# RandomForest
from sklearn.ensemble import RandomForestClassifier
# CatBoost
from catboost import CatBoostClassifier
# LightGBM
import lightgbm as lgb
# XGBoost
import xgboost as xgb
from sklearn.model_selection import GridSearchCV

# 指標を計算するため
from sklearn.metrics import accuracy_score, cohen_kappa_score, make_scorer, f1_score, recall_score

# 見た目を綺麗にするため
import matplotlib.pyplot as plt
import pprint, pydotplus

In [None]:
# データの読み込み
df=pd.read_csv('Table.csv')

#X yの設定
#X：.dropで行or列を削除、axis=1で行を削除と指定。'Subject'の行を除外する。
X=df.drop('subject',axis=1) 

#y：df内からSubjectの行のみを取り出す。
y=df.subject

In [None]:
#層化k分割交差検証　n_splits=10のため 1/10 * 10回での検討
skf = StratifiedKFold(n_splits=10,
                      shuffle=True,
                      random_state=0)

In [None]:
%%time

# モデル選択
model = RandomForestClassifier(random_state = 43,
                               n_jobs = -1,
                               oob_score=True)

# パラメーター設定
param_grid = {"n_estimators":[100,500,1000], 
              "max_features": [1, 2, 3, 4, 5, 7, 10],
              "max_depth": [3,5,7,10,15,None], 
              "min_samples_leaf":  [1, 2, 4],
              "min_samples_split": [2, 5, 10]
             } 

# パラメータチューニングをグリッドサーチ
grid_result = GridSearchCV(estimator = model,
                           param_grid = param_grid,
                           scoring = 'balanced_accuracy',
                           cv = skf,
                           return_train_score = True,
                           n_jobs = -1)

grid_result.fit(X, y)

In [None]:
#ベストな分類器の抽出
pprint.pprint(grid_result.best_estimator_)

In [None]:
#ベストなパラメータの抽出
pprint.pprint(grid_result.best_params_)

In [None]:
#ベストな精度の抽出
pprint.pprint(grid_result.best_score_)