In [1]:
import pickle
from pathlib import Path
import sys

import pandas as pd
from sklearn.svm import SVC
from xgboost import XGBClassifier

from module.mymodule import grid_search_cv
from features import pipe_1, pipe_2, pipe_3, pipe_4, pipe_5, pipe_6, pipe_7,\
                     pipe_8, pipe_9, pipe_10

# モデルのチューニングと訓練を行う

### 特徴量候補を用意してデータセット作成

In [2]:
df = pd.read_csv('./data/train.csv')
to_pipe ={
            'df': df,
            'split_kwrg': {'test_size': 0.2, 'to_array': True},
            'train_flg': True,
            'retrain': False,
            }

pipe_lines = [
            pipe_1,
            # pipe_2,
            # pipe_3,
            # pipe_4,
            # pipe_5,
            # pipe_6,
            # pipe_7,
            # pipe_8,
            pipe_9,
            pipe_10
            ]  # 特徴量候補を設定
data_set = {pipe.__name__: pipe(**to_pipe) for pipe in pipe_lines}

                                Base(pipe_1)                               


Unnamed: 0,Age,Sex,RestingBP,Cholesterol,FastingBS,MaxHR,ExerciseAngina,Oldpeak
0,56,1,155,342,1,150,1,3.0
1,55,0,130,394,0,150,0,0.0
2,47,1,110,0,1,120,1,0.0


                          Onehot Standard(pipe_9)                          


Unnamed: 0,Age,Sex,RestingBP,Cholesterol,FastingBS,MaxHR,ExerciseAngina,Oldpeak,ChestPainType_ASY,ChestPainType_ATA,ChestPainType_NAP,ChestPainType_TA,RestingECG_LVH,RestingECG_Normal,RestingECG_ST,ST_Slope_Down,ST_Slope_Flat,ST_Slope_Up
0,56,1,155,342,1,150,1,3.0,1,0,0,0,0,1,0,0,1,0
1,55,0,130,394,0,150,0,0.0,0,1,0,0,1,0,0,0,0,1
2,47,1,110,0,1,120,1,0.0,0,0,1,0,0,1,0,0,1,0


                CholestMean AgeCat Onehot Standard(pipe_10)                


Unnamed: 0,Age,Sex,RestingBP,Cholesterol,FastingBS,MaxHR,ExerciseAngina,Oldpeak,ChestPainType_ASY,ChestPainType_ATA,ChestPainType_NAP,ChestPainType_TA,RestingECG_LVH,RestingECG_Normal,RestingECG_ST,ST_Slope_Down,ST_Slope_Flat,ST_Slope_Up
0,3,1,155,342.0,1,150,1,3.0,1,0,0,0,0,1,0,0,1,0
1,3,0,130,394.0,0,150,0,0.0,0,1,0,0,1,0,0,0,0,1
2,2,1,110,243.414258,1,120,1,0.0,0,0,1,0,0,1,0,0,1,0


### モデル候補を用意

In [3]:
# XGBoost
xgboost = {'model': XGBClassifier,
           'param_grid': {
                          'max_depth':[3, 5, 7, 9, 15],
                          'learning_rate': [0.05, 0.1, 0.3],
                          'n_estimators': [50, 75, 100, 150],
                          },
           'model_arg': {'random_state': 42, 'early_stopping_rounds': 100}
          }
# SVC
svc = {'model': SVC,
           'param_grid': {},
           'model_arg': {'random_state': 42}
          }

### モデルの訓練

In [4]:
# モデルの候補を設定
model_candidates = [
                    xgboost,
                    svc
                    ]

trained_models = {}  # 訓練したモデルの格納先
for candidate in model_candidates:
    print(candidate['model'].__name__.center(50, '#'))
    models = {}
    for key, pack in data_set.items():
        print(key.center(50))
        models[key] = grid_search_cv(pack, **candidate)
    trained_models[candidate['model'].__name__] = models
    
#モデルを保存
with open('./data/model.pkl', mode='wb') as f:
    pickle.dump(trained_models, f)

##################XGBClassifier###################
                      pipe_1                      
-------------------- 評価結果 --------------------


Unnamed: 0,accuracy_score,precision_score,recall_score,f1_score
train,0.871345,0.916996,0.837545,0.875472
test,0.806202,0.901408,0.780488,0.836601


                      pipe_9                      
-------------------- 評価結果 --------------------


Unnamed: 0,accuracy_score,precision_score,recall_score,f1_score
train,0.94152,0.936396,0.956679,0.946429
test,0.868217,0.901235,0.890244,0.895706


                     pipe_10                      
-------------------- 評価結果 --------------------


Unnamed: 0,accuracy_score,precision_score,recall_score,f1_score
train,0.916179,0.914894,0.931408,0.923077
test,0.852713,0.888889,0.878049,0.883436


#######################SVC########################
                      pipe_1                      
-------------------- 評価結果 --------------------


Unnamed: 0,accuracy_score,precision_score,recall_score,f1_score
train,0.861598,0.896154,0.841155,0.867784
test,0.813953,0.891892,0.804878,0.846154


                      pipe_9                      
-------------------- 評価結果 --------------------


Unnamed: 0,accuracy_score,precision_score,recall_score,f1_score
train,0.923977,0.916084,0.945848,0.930728
test,0.868217,0.882353,0.914634,0.898204


                     pipe_10                      
-------------------- 評価結果 --------------------


Unnamed: 0,accuracy_score,precision_score,recall_score,f1_score
train,0.912281,0.908451,0.931408,0.919786
test,0.860465,0.9,0.878049,0.888889
