In [1]:
from pathlib import Path
import sys

import pandas as pd
from sklearn.svm import SVC
from xgboost import XGBClassifier

rank = 0 
#sys.path[0] = f'{Path().resolve().parents[rank]}' # mymoduleが上の階層にある場合rankを調整してコメント解除
from mymodule import PipeLine, grid_search_cv
from kayano import age_categolize, stSlope_categolize, cholesterol_mean

### 特徴量候補を用意

In [2]:
def pipe_1(df, split_kwrg):
    pipe = PipeLine()
    pipe(df)
    pack = pipe.fold_out_split(**split_kwrg)
    return pack

def pipe_2(df, split_kwrg):
    pipe = PipeLine()
    pipe(df)
    pipe.df_cat = stSlope_categolize(pipe.df_cat)  # ST_Slopeをカテゴライズ
    slop = pipe.df_num.copy()
    slop['ST_Slope'] = pipe.df_cat['ST_Slope']
    pipe.df_num = slop
    #display(pipe.df_num)
    pack = pipe.fold_out_split(**split_kwrg)
    return pack

def pipe_3(df, split_kwrg):
    pipe = PipeLine()
    pipe(df)
    pipe.df_num = cholesterol_mean(pipe.df_num)
    #display(pipe.df_num)
    pack = pipe.fold_out_split(**split_kwrg)
    return pack

### モデル候補を用意

In [3]:
df = pd.read_csv('./data/train.csv')
#################### グリッドサーチの探索候補を用意 #################
xgboost = {'model': XGBClassifier,
           'param_grid': {
                          'max_depth':[3, 5, 7, 9, 15],
                          'learning_rate': [0.05, 0.1, 0.3],
                          'n_estimators': [50, 75, 100, 150],
                          },
           'model_arg': {'random_state': 42, 'early_stopping_rounds': 100}
          }

svc = {'model': SVC,
           'param_grid': {},
           'model_arg': {'random_state': 42}
          }

### パイプラインからデータセットの作成

In [4]:
split_kwrg = {'test_size': 0.2, 'to_array': True}

pipe_lines = [pipe_1, pipe_2, pipe_3]
data_set = {pipe.__name__: pipe(df, split_kwrg) for pipe in pipe_lines}

### モデルの訓練

In [10]:
model_candidates = [xgboost, svc]

trained_models = {}
for candidate in model_candidates:
    print(candidate['model'].__name__.center(50, '#'))
    models = {}
    for key, pack in data_set.items():
        print(key.center(50))
        models[key] = {'model': grid_search_cv(pack, **candidate)}
    for key, value in models.items():
        models[key]['best_params'] = value['model'].best_params_
    trained_models[candidate['model'].__name__] = models

##################XGBClassifier###################
                      pipe_1                      
-------------------- 評価結果 --------------------


Unnamed: 0,accuracy_score,precision_score,recall_score,f1_score
train,0.861598,0.905512,0.830325,0.86629
test,0.837209,0.917808,0.817073,0.864516


                      pipe_2                      
-------------------- 評価結果 --------------------


Unnamed: 0,accuracy_score,precision_score,recall_score,f1_score
train,0.916179,0.917857,0.927798,0.922801
test,0.875969,0.923077,0.878049,0.9


                      pipe_3                      
-------------------- 評価結果 --------------------


Unnamed: 0,accuracy_score,precision_score,recall_score,f1_score
train,0.97076,0.971223,0.974729,0.972973
test,0.837209,0.896104,0.841463,0.867925


#######################SVC########################
                      pipe_1                      
-------------------- 評価結果 --------------------


Unnamed: 0,accuracy_score,precision_score,recall_score,f1_score
train,0.744639,0.77037,0.750903,0.760512
test,0.697674,0.772152,0.743902,0.757764


                      pipe_2                      
-------------------- 評価結果 --------------------


Unnamed: 0,accuracy_score,precision_score,recall_score,f1_score
train,0.748538,0.772059,0.758123,0.765027
test,0.697674,0.772152,0.743902,0.757764


                      pipe_3                      
-------------------- 評価結果 --------------------


Unnamed: 0,accuracy_score,precision_score,recall_score,f1_score
train,0.699805,0.701639,0.772563,0.735395
test,0.682171,0.741176,0.768293,0.754491


In [6]:
trained_models['XGBClassifier']['pipe_1']['model']

In [8]:
trained_models['XGBClassifier']['pipe_1']['best_params']

{'learning_rate': 0.3, 'max_depth': 3, 'n_estimators': 50}

In [7]:
trained_models['SVC']

{'pipe_1': {'model': GridSearchCV(cv=10, estimator=SVC(random_state=42), n_jobs=-1, param_grid={},
               scoring='accuracy'),
  'best_params': {}},
 'pipe_2': {'model': GridSearchCV(cv=10, estimator=SVC(random_state=42), n_jobs=-1, param_grid={},
               scoring='accuracy'),
  'best_params': {}},
 'pipe_3': {'model': GridSearchCV(cv=10, estimator=SVC(random_state=42), n_jobs=-1, param_grid={},
               scoring='accuracy'),
  'best_params': {}}}