In [None]:
#!usr/bin/env python3

import numpy as np
import pandas as pd

#pipeline
from sklearn.pipeline import Pipeline

#標準化→Standard Scaler
from sklearn.preprocessing import StandardScaler

#Support vector machine
from sklearn.svm import SVC

#RandomForest
from sklearn.ensemble import RandomForestClassifier as RFC

#XGBoost
import xgboost as xgb
XGB = xgb.XGBClassifier()

#LightGBM
import lightgbm as lgb
LGB = lgb.LGBMClassifier(silent=False)

#次元削減→PCA
from sklearn.decomposition import PCA

#特徴量選択→Gradient Boosting Classifier
from sklearn.ensemble import GradientBoostingClassifier

#特徴量選択→Recursive feature elimination
from sklearn.feature_selection import RFE

#特徴量選択→Select From Model
from sklearn.feature_selection import SelectFromModel

# GridSearch
from sklearn.model_selection import GridSearchCV

# Stratified k-fold cross-validation
# Train test split
# Cross val score
# Cross val predict
# KFold
from sklearn.model_selection import StratifiedKFold, train_test_split, cross_val_score, cross_val_predict, KFold

# 指標を計算するため
from sklearn.metrics import accuracy_score, cohen_kappa_score, make_scorer, f1_score, recall_score

# 特徴量重要度の計算 → Permutation imporatance
import eli5
from eli5.sklearn import PermutationImportance

In [None]:
# データの読み込み
df=pd.read_csv('table.csv')

#X yの設定
#Xの.dropで行or列を削除、axis=1で行を削除と指定。削除する行の指定は'subject'で行う。
X=df.drop('subject',axis=1) 

#yはdf内からsbmのみを取り出す。
y=df.subject

In [None]:
#初期データの特徴量一覧
display(X.columns)

In [None]:
print(X)

In [None]:
#標準化
std = StandardScaler()
X_std = std.fit(X)
print(X_std)

In [None]:
# モデル選択→SVC
model = SVC()

# K_fold : n=10
kf = KFold(n_splits=10, shuffle=True, random_state=0)

# パラメーター設定
param_grid = [
                        {'C': [1, 10, 100, 1000], 
                         'kernel': ['linear']},
                        {'C': [1, 10, 100, 1000], 
                         'kernel': ['rbf'], 'gamma': [0.001, 0.0001]},
                        {'C': [1, 10, 100, 1000],
                         'kernel': ['poly'], 'degree': [2, 3, 4], 'gamma': [0.001, 0.0001]},
                        {'C': [1, 10, 100, 1000],
                         'kernel': ['sigmoid'], 'gamma': [0.001, 0.0001]}
                         ]

# パラメータチューニングをグリッドサーチ
gs = GridSearchCV(estimator = model,
                           param_grid = param_grid,
                           scoring = 'accuracy', #accuracy
                           cv = kf,
                           return_train_score = True,
                           n_jobs = -1)

# Nested Cross validation：分割されたtrain dataごとにgrid searchした上で10-fold cross validationを実施
y_pred = cross_val_predict(gs, X, y, cv=kf)
conf_mat = confusion_matrix(y,y_pred)

In [None]:
# 結果
print('Prediction：', y_pred)

In [None]:
# 精度（＝Accuracy)
scores = accuracy_score(y, y_pred)
print('Scores:', scores)

In [None]:
# 平均した精度
average_score = scores.mean
print('Avarage_score:', average_score)

In [None]:
#混合行列
tn = []
fp = []
fn = []
tp = []

tn.append(confusion_matrix(y, y_pred)[0][0])
fp.append(confusion_matrix(y, y_pred)[0][1])
fn.append(confusion_matrix(y, y_pred)[1][0])
tp.append(confusion_matrix(y, y_pred)[1][1])

accuracy = ((np.array(tp)+np.array(tn))/(np.array(tp)+np.array(fp)+np.array(fn)+np.array(tn)))
sensitivity = (np.array(tp)/(np.array(tp)+np.array(fn)))
specificity = (np.array(tn)/(np.array(tn)+np.array(fp)))

mean_accuracy = accuracy.mean()
mean_sensitivity = sensitivity.mean()
mean_specificity = specificity.mean()

In [None]:
print('Accuracy:', mean_accuracy)
print('Sensitivity:', mean_sensitivity)
print('specificity:', mean_specificity)