In [1]:
import pickle
from pathlib import Path
import sys

import pandas as pd
from sklearn.svm import SVC
from xgboost import XGBClassifier

rank = 0 
#sys.path[0] = f'{Path().resolve().parents[rank]}' # mymoduleが上の階層にある場合rankを調整してコメント解除
from module.mymodule import PipeLine, grid_search_cv, df_copy, train_or_test
from module.kayano import age_categolize, stSlope_categolize, cholesterol_mean

### 特徴量候補を用意

In [2]:
def pipe_1(df, train_flg, split_kwrg):
    pipe = PipeLine()
    pipe.train_flg = train_flg
    pipe(df)
    pipe.standard_scaler()
    pack = train_or_test(pipe, train_flg, split_kwrg)
    return pack

def pipe_2(df, train_flg, split_kwrg):
    pipe = PipeLine()
    pipe.train_flg = train_flg
    pipe(df)
    pipe.df_num = df_copy(pipe.df_num, stSlope_categolize(pipe.df_cat), 'ST_Slope')
    pipe.standard_scaler()
    pack = train_or_test(pipe, train_flg, split_kwrg)
    return pack

def pipe_3(df, train_flg, split_kwrg):
    pipe = PipeLine()
    pipe.train_flg = train_flg
    pipe.train_flg = train_flg
    pipe(df)
    pipe.df_num = cholesterol_mean(pipe.df_num)
    pipe.standard_scaler()
    pack = train_or_test(pipe, train_flg, split_kwrg)
    return pack

### モデル候補を用意

In [3]:
# XGBoost
xgboost = {'model': XGBClassifier,
           'param_grid': {
                          'max_depth':[3, 5, 7, 9, 15],
                          'learning_rate': [0.05, 0.1, 0.3],
                          'n_estimators': [50, 75, 100, 150],
                          },
           'model_arg': {'random_state': 42, 'early_stopping_rounds': 100}
          }
# SVC
svc = {'model': SVC,
           'param_grid': {},
           'model_arg': {'random_state': 42}
          }

### パイプラインからデータセットの作成

In [4]:
df = pd.read_csv('./data/train.csv')
split_kwrg = {'test_size': 0.2, 'to_array': True}  # 訓練する場合はto_arrayをTrueにすること。エラー出ます
train_flg = True

pipe_lines = [pipe_1, pipe_2, pipe_3]  # 特徴量候補を設定
data_set = {pipe.__name__: pipe(df, train_flg, split_kwrg) for pipe in pipe_lines}

### モデルの訓練

In [5]:
model_candidates = [xgboost, svc]  # モデルの候補を設定

trained_models = {}  # 訓練したモデルの格納先
for candidate in model_candidates:
    print(candidate['model'].__name__.center(50, '#'))
    models = {}
    for key, pack in data_set.items():
        print(key.center(50))
        models[key] = grid_search_cv(pack, **candidate)
    trained_models[candidate['model'].__name__] = models
# モデルをカレントに保存
with open('./data/model.pkl', mode='wb') as f:
    pickle.dump(trained_models, f)

##################XGBClassifier###################
                      pipe_1                      
-------------------- 評価結果 --------------------


Unnamed: 0,accuracy_score,precision_score,recall_score,f1_score
train,0.861598,0.905512,0.830325,0.86629
test,0.837209,0.917808,0.817073,0.864516


                      pipe_2                      
-------------------- 評価結果 --------------------


Unnamed: 0,accuracy_score,precision_score,recall_score,f1_score
train,0.916179,0.917857,0.927798,0.922801
test,0.875969,0.923077,0.878049,0.9


                      pipe_3                      
-------------------- 評価結果 --------------------


Unnamed: 0,accuracy_score,precision_score,recall_score,f1_score
train,0.97076,0.971223,0.974729,0.972973
test,0.837209,0.896104,0.841463,0.867925


#######################SVC########################
                      pipe_1                      
-------------------- 評価結果 --------------------


Unnamed: 0,accuracy_score,precision_score,recall_score,f1_score
train,0.861598,0.896154,0.841155,0.867784
test,0.813953,0.891892,0.804878,0.846154


                      pipe_2                      
-------------------- 評価結果 --------------------


Unnamed: 0,accuracy_score,precision_score,recall_score,f1_score
train,0.898635,0.889273,0.927798,0.908127
test,0.875969,0.923077,0.878049,0.9


                      pipe_3                      
-------------------- 評価結果 --------------------


Unnamed: 0,accuracy_score,precision_score,recall_score,f1_score
train,0.849903,0.878788,0.837545,0.857671
test,0.806202,0.890411,0.792683,0.83871
