In [None]:
import numpy as np
import pandas as pd

# Stratified k-fold cross-validation
from sklearn.model_selection import StratifiedKFold

# Support vector machine
from sklearn.svm import SVC

# GridSearch
from sklearn.model_selection import GridSearchCV

# 指標を計算するため
from sklearn.metrics import accuracy_score, cohen_kappa_score, make_scorer, f1_score, recall_score

# 見た目を綺麗にするため
import matplotlib.pyplot as plt
import pprint, pydotplus


In [None]:
# データの読み込み
df=pd.read_csv('Table.csv')

#X yの設定
#Xの.dropで行or列を削除、axis=1で行を削除と指定。削除する行の指定は'group'で行う。
x=df.drop('group',axis=1) 

#yはdf内からsbmのみを取り出す。
y=df.group

In [None]:
# 標準化
from sklearn.preprocessing import StandardScaler
stdsc = StandardScaler()

# 訓練用のデータを標準化
X = stdsc.fit_transform(x)

In [None]:
#層化k分割交差検証　n_splits=10のため 1/10 * 10回での検討
skf = StratifiedKFold(n_splits=10,
                      shuffle=True,
                      random_state=0)

In [None]:
%%time

# モデル選択
model = SVC()

# パラメーター設定
param_grid = [
                        {'C': [1, 10, 100, 1000], 
                         'kernel': ['linear']},
                        {'C': [1, 10, 100, 1000], 
                         'kernel': ['rbf'], 'gamma': [0.001, 0.0001]},
                        {'C': [1, 10, 100, 1000],
                         'kernel': ['poly'], 'degree': [2, 3, 4], 'gamma': [0.001, 0.0001]},
                        {'C': [1, 10, 100, 1000],
                         'kernel': ['sigmoid'], 'gamma': [0.001, 0.0001]}
                         ]

# パラメータチューニングをグリッドサーチ
grid_result = GridSearchCV(estimator = model,
                           param_grid = param_grid,
                           scoring = 'accuracy',
                           cv = skf,
                           return_train_score = True,
                           n_jobs = -1)

grid_result.fit(X, y)

In [None]:
#ベストな分類器の抽出
pprint.pprint(grid_result.best_estimator_)

In [None]:
#ベストなパラメータの抽出
pprint.pprint(grid_result.best_params_)

In [None]:
#ベストな精度の抽出
pprint.pprint(grid_result.best_score_)