In [1]:
import pickle
from pathlib import Path
import sys

import pandas as pd
from sklearn.svm import SVC
from xgboost import XGBClassifier

from module.mymodule import grid_search_cv
from features import pipe_1, pipe_2, pipe_3, pipe_4, pipe_5

### モデル候補を用意

In [2]:
# XGBoost
xgboost = {'model': XGBClassifier,
           'param_grid': {
                          'max_depth':[3, 5, 7, 9, 15],
                          'learning_rate': [0.05, 0.1, 0.3],
                          'n_estimators': [50, 75, 100, 150],
                          },
           'model_arg': {'random_state': 42, 'early_stopping_rounds': 100}
          }
# SVC
svc = {'model': SVC,
           'param_grid': {},
           'model_arg': {'random_state': 42}
          }

### 特徴量候補を用意してデータセット作成

In [3]:
df = pd.read_csv('./data/train.csv')
to_pipe ={
            'df': df,
            'split_kwrg': {'test_size': 0.2, 'to_array': True},
            'train_flg': True,
            'retrain': False,
            }

pipe_lines = [
            pipe_1,
            #pipe_2,
            #pipe_3,
            pipe_4,
            pipe_5,
            ]  # 特徴量候補を設定
data_set = {pipe.__name__: pipe(**to_pipe) for pipe in pipe_lines}

                                Base(pipe_1)                               


Unnamed: 0,Age,Sex,RestingBP,Cholesterol,FastingBS,MaxHR,ExerciseAngina,Oldpeak
0,56,1,155,342,1,150,1,3.0
1,55,0,130,394,0,150,0,0.0
2,47,1,110,0,1,120,1,0.0


                          AgeCAt Standard(pipe_4)                          


Unnamed: 0,Age,Sex,RestingBP,Cholesterol,FastingBS,MaxHR,ExerciseAngina,Oldpeak
0,3,1,155,342,1,150,1,3.0
1,3,0,130,394,0,150,0,0.0
2,2,1,110,0,1,120,1,0.0


              StSlpoeCat CholestMean AgeCAt Standard(pipe_5))              


Unnamed: 0,Age,Sex,RestingBP,Cholesterol,FastingBS,MaxHR,ExerciseAngina,Oldpeak
0,3,1,155,342.0,1,150,1,3.0
1,3,0,130,394.0,0,150,0,0.0
2,2,1,110,243.414258,1,120,1,0.0


### モデルの訓練

In [4]:
model_candidates = [
                    xgboost,
                    #svc
                    ]  # モデルの候補を設定

trained_models = {}  # 訓練したモデルの格納先
for candidate in model_candidates:
    print(candidate['model'].__name__.center(50, '#'))
    models = {}
    for key, pack in data_set.items():
        print(key.center(50))
        models[key] = grid_search_cv(pack, **candidate)
    trained_models[candidate['model'].__name__] = models
# モデルを保存
with open('./data/model.pkl', mode='wb') as f:
    pickle.dump(trained_models, f)

##################XGBClassifier###################
                      pipe_1                      
-------------------- 評価結果 --------------------


Unnamed: 0,accuracy_score,precision_score,recall_score,f1_score
train,0.871345,0.916996,0.837545,0.875472
test,0.806202,0.901408,0.780488,0.836601


                      pipe_4                      
-------------------- 評価結果 --------------------


Unnamed: 0,accuracy_score,precision_score,recall_score,f1_score
train,0.867446,0.916335,0.830325,0.871212
test,0.79845,0.9,0.768293,0.828947


                      pipe_5                      
-------------------- 評価結果 --------------------


Unnamed: 0,accuracy_score,precision_score,recall_score,f1_score
train,0.900585,0.937984,0.873646,0.904673
test,0.813953,0.881579,0.817073,0.848101
