In [2]:
from utils.data_loader import train_data_loader, test_data_loader
from utils.inference_tools import pred_to_binary, export_csv, making_result
from utils.model_stacking import *

from xgboost import XGBClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.linear_model import LogisticRegression, Lasso, RidgeClassifier, SGDClassifier, Lars, LassoLars
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.metrics import fbeta_score, make_scorer

import pandas as pd
import numpy as np
import pickle
import datetime

import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

  from ._conv import register_converters as _register_converters
  from pandas.core import datetools
Using TensorFlow backend.


# Load Data and Pre-processing

In [5]:
# Print Current Time
time = str(datetime.datetime.now()).split()[1].split('.')[0]
print("Start:", time)


# Print Information
name = 'KHW'
model = 'ML Stacking'
summary = 'HyperParams tuning with 10 sklearn models + 4 stacking model'

print('Author Name :', name)
print('Model :', model)
print('Summary :', summary)
print("\n")


# Setting
path = "./data"
pos_dir = path+"/train/positive/"
neg_dir = path+"/train/negative/"
test_dir = path+'/test/'

features = ['firstorder', 'shape']
target_voxel = (0.65, 0.65, 3)

Start: 07:50:31
Author Name : KHW
Model : ML Stacking
Summary : HyperParams tuning with 10 sklearn models + 4 stacking model




In [3]:
do_n4 = False
do_ws = True
do_resample = True
do_shuffle = True

X_train, y_train = train_data_loader(pos_dir, neg_dir, do_n4, do_ws, do_resample, do_shuffle, features, target_voxel)

Processing [1/3] Image of Positive Patient... (02:02:35)
>>> Finished : Voxel Size Resampling (02:02:48)
>>> Unique Value of BRAIN mask : [0. 1.]
>>> Unique Value of INFARCT mask : [0. 1.]
>>> Finished : White-stripe Normalization (02:02:49)


GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated


Processing [2/3] Image of Positive Patient... (02:02:51)
>>> Finished : Voxel Size Resampling (02:03:04)
>>> Unique Value of BRAIN mask : [0. 1.]
>>> Unique Value of INFARCT mask : [0. 1.]
>>> Finished : White-stripe Normalization (02:03:05)


GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated


Processing [3/3] Image of Positive Patient... (02:03:07)
>>> Finished : Voxel Size Resampling (02:03:19)
>>> Unique Value of BRAIN mask : [0. 1.]
>>> Unique Value of INFARCT mask : [0. 1.]
>>> Finished : White-stripe Normalization (02:03:20)


GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated


Processing [1/3] Image of Negative Patient... (02:03:23)
>>> Finished : Voxel Size Resampling (02:03:35)
>>> Unique Value of BRAIN mask : [0. 1.]
>>> Unique Value of INFARCT mask : [0. 1.]
>>> Finished : White-stripe Normalization (02:03:36)


GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated


Processing [2/3] Image of Negative Patient... (02:03:39)
>>> Finished : Voxel Size Resampling (02:03:53)
>>> Unique Value of BRAIN mask : [0. 1.]
>>> Unique Value of INFARCT mask : [0. 1.]
>>> Finished : White-stripe Normalization (02:03:53)


GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated


Processing [3/3] Image of Negative Patient... (02:03:56)
>>> Finished : Voxel Size Resampling (02:04:09)
>>> Unique Value of BRAIN mask : [0. 1.]
>>> Unique Value of INFARCT mask : [0. 1.]
>>> Finished : White-stripe Normalization (02:04:09)


GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated


Created X of shape (6, 64) and y of shape (6,) (02:04:11)


In [11]:
do_n4 = False
do_ws = True
do_resample = True

X_test, patient_num, error_patient = test_data_loader(test_dir, do_n4, do_ws, do_resample, features, target_voxel)

Processing [1/2] Image of Test Patient... (07:51:12)
>>> Finished : Voxel Size Resampling (07:51:25)
>>> Unique Value of BRAIN mask : [0. 1.]
>>>Unique Value of INFARCT mask : [0. 1.]
>>> Finished : White-stripe Normalization (07:51:25)


GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated


Processing [2/2] Image of Test Patient... (07:52:14)
>>> Finished : Voxel Size Resampling (07:52:28)
>>> Unique Value of BRAIN mask : [0. 1.]
>>>Unique Value of INFARCT mask : [0. 1.]
>>> Finished : White-stripe Normalization (07:52:29)


GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated


# Base Model

### xgboost

In [3]:
# Fit Model with Training Data
model1 = XGBClassifier(n_jobs=4)
model1.fit(X_train, y_train)


# Save model to file
# pickle.dump(model1, open('./data/model/model1.pickle.dat', 'wb'))

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
       max_depth=3, min_child_weight=1, missing=None, n_estimators=100,
       n_jobs=4, nthread=None, objective='binary:logistic', random_state=0,
       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
       silent=True, subsample=1)

### svm

In [4]:
# Fit Model with Training Data
model2 = SVC()
model2.fit(X_train, y_train)


# Save model to file
# pickle.dump(model2, open('./data/model/model2.pickle.dat', 'wb'))

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

### logistic regression

In [5]:
# Fit Model with Training Data
model3 = LogisticRegression(n_jobs=4)
model3.fit(X_train, y_train)


# Save model to file
# pickle.dump(model3, open('./data/model/model3.pickle.dat', 'wb'))

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=4,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

### random forest

In [6]:
# Fit Model with Training Data
model4 = RandomForestClassifier(n_jobs=4)
model4.fit(X_train, y_train)


# Save model to file
# pickle.dump(mode4l, open('./data/model/model4.pickle.dat', 'wb'))

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=4,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

<br><br><br>

# Score

In [None]:
BETA=1

In [3]:
def new_scorer(y_true, y_pred, threshold=0.5) :
    result = []
    global BETA

    for pred in list(y_pred) :
        if pred >= threshold :
            result.append(1)
        else :
            result.append(0)
            
    return fbeta_score(y_true, np.array(result), beta=BETA)

In [4]:
scorer = make_scorer(fbeta_score, beta=BETA)

# Modeling

### MLP

### CNN

# Parameter Tuning & CV

### xgboost

In [5]:
model1 = XGBClassifier()

In [6]:
m1_params1 = {
    'max_depth' : [5,6,7,8],
    'min_child_weight' : [0.5, 1, 5, 10, 15, 20],
    'gamma' : [1.5, 2, 2.5, 3.0, 5],
    'subsample' : [0.5, 0.6, 0.8, 1.0],
    'colsample_bytree' : [0.5, 0.6, 0.8, 1.0],
    'probability' : [True],
    'learning_rate' : [0.01, 0.05, 0.1],
    'n_estimators' : [300, 500, 700]

}

m1_grid_1 = GridSearchCV(model1, param_grid=m1_params1, scoring=scorer, cv=2, verbose=0, n_jobs=-1)
m1_grid_1.fit(X_train, y_train)

best_model1 = m1_grid_1.best_estimator_

print("Best Score : {}".format(m1_grid_1.best_score_))
print("Best Params : {}".format(m1_grid_1.best_params_))

Best Score : 0.0
Best Params : {'max_depth': 5}


### svm

In [7]:
model2 = SVC()

In [8]:
m2_params1 = {
    'C': [0.001, 0.01, 0.1, 1, 10, 100], 
    'gamma' : [0.001, 0.01, 0.1, 1, 2, 5, 10, 20],
    'degree' : [2,3,4],
    'probability' : [True]
}

m2_grid_1 = GridSearchCV(model2, param_grid=m2_params1, scoring=scorer, cv=2, verbose=0, n_jobs=-1)
m2_grid_1.fit(X_train, y_train)

best_model2 = m2_grid_1.best_estimator_

print("Best Score : {}".format(m2_grid_1.best_score_))
print("Best Params : {}".format(m2_grid_1.best_params_))

Best Score : 0.5555555555555556
Best Params : {'C': 0.001, 'degree': 2, 'gamma': 0.001, 'probability': True}


### logistic regression

In [9]:
model3 = LogisticRegression()

In [10]:
m3_params1 = {
    'C': [0.001, 0.01, 0.1, 1, 10, 100],
    'max_iter' : [n for n in range(100,1101, 200)],
}

m3_grid_1 = GridSearchCV(model3, param_grid=m3_params1, scoring=scorer, cv=2, verbose=0, n_jobs=-1)
m3_grid_1.fit(X_train, y_train)

best_model3 = m3_grid_1.best_estimator_

print("Best Score : {}".format(m3_grid_1.best_score_))
print("Best Params : {}".format(m3_grid_1.best_params_))

Best Score : 0.5555555555555556
Best Params : {'C': 0.001, 'max_iter': 100}


### random forest

In [11]:
model4 = RandomForestClassifier()

In [12]:
m4_params1 = {
    'max_depth' : [6, 8, 10, 15, 20, 30, 40, 50],
    'min_samples_leaf': [1, 2, 3, 4, 5,10, 20, 50],
    'n_estimators' : [100, 300, 500]
}

m4_grid_1 = GridSearchCV(model4, param_grid=m4_params1, scoring=scorer, cv=2, verbose=0, n_jobs=-1)
m4_grid_1.fit(X_train, y_train)

best_model4 = m4_grid_1.best_estimator_

print("Best Score : {}".format(m4_grid_1.best_score_))
print("Best Params : {}".format(m4_grid_1.best_params_))

Best Score : 0.8095238095238094
Best Params : {'max_depth': 40}


### lasso regression

In [13]:
model5 = LogisticRegression()

In [14]:
m5_params1 = {
    'C': [0.001, 0.01, 0.1, 1, 10, 100],
    'max_iter' : [n for n in range(100,1101, 200)],
    'penalty' : ["l1"]
}

m5_grid_1 = GridSearchCV(model5, param_grid=m5_params1, scoring=scorer, cv=2, verbose=0, n_jobs=-1)
m5_grid_1.fit(X_train, y_train)

best_model5 = m5_grid_1.best_estimator_

print("Best Score : {}".format(m5_grid_1.best_score_))
print("Best Params : {}".format(m5_grid_1.best_params_))

Best Score : 0.6666666666666666
Best Params : {'C': 10, 'max_iter': 100, 'penalty': 'l1'}


### ridge regression

In [15]:
model6 = RidgeClassifier()

In [16]:
m6_params1 = {
    'alpha': [0.1, 1, 2, 5, 10, 20, 50, 100],
    'max_iter' : [None]+[n for n in range(100,1101, 200)]
}

m6_grid_1 = GridSearchCV(model6, param_grid=m6_params1, scoring=scorer, cv=2, verbose=0, n_jobs=-1)
m6_grid_1.fit(X_train, y_train)

best_model6 = m6_grid_1.best_estimator_

print("Best Score : {}".format(m6_grid_1.best_score_))
print("Best Params : {}".format(m6_grid_1.best_params_))

Best Score : 0.42328042328042326
Best Params : {'alpha': 0.1, 'max_iter': None}


### elasticNet

In [17]:
model7 = SGDClassifier()

In [18]:
m7_params1 = {
    'alpha': [0.001, 0.01, 0.1, 1, 2, 5, 10, 20, 50, 100],
    'l1_ratio':[0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7], 
    'max_iter' : [None]+[n for n in range(800, 1601, 200)],
    'penalty' : ["elasticnet"],
    'loss' : ["log"]
}

m7_grid_1 = GridSearchCV(model7, param_grid=m7_params1, scoring=scorer, cv=2, verbose=0, n_jobs=-1)
m7_grid_1.fit(X_train, y_train)

best_model7 = m7_grid_1.best_estimator_

print("Best Score : {}".format(m7_grid_1.best_score_))
print("Best Params : {}".format(m7_grid_1.best_params_))

Best Score : 0.5555555555555556
Best Params : {'alpha': 0.001, 'l1_ratio': 0.1, 'loss': 'log', 'max_iter': None, 'penalty': 'elasticnet'}


### LARS

In [19]:
model8 = Lars()

In [20]:
m8_params1 = {
    'n_nonzero_coefs': [n for n in range(30, 150, 20)]
}

max_score=0
m8_best_t = 0
best_model8 = ""
m8_best_grid_1 = ""

for t in [0, 0.05, 0.1, 0.2, 0.25, 0.3, 0.45, 0.4, 0.45, 0.5, 0.6] :
    scorer2 = make_scorer(new_scorer, threshold=t)
    m8_grid_1 = GridSearchCV(model8, param_grid=m8_params1, scoring=scorer2, cv=2, verbose=0, n_jobs=-1)
    m8_grid_1.fit(X_train, y_train)

    if max_score < m8_grid_1.best_score_ :
        best_model8 = m8_grid_1.best_estimator_
        m8_best_t = t
        m8_best_grid_1 = m8_grid_1
        
m8_grid_1 = m8_best_grid_1
best_model8 = m8_grid_1.best_estimator_

print("Best Score : {}".format(m8_grid_1.best_score_))     
print("Threshold :", m8_best_t)
print("Best Params : {}".format(m8_grid_1.best_params_))

Best Score : 0.6349206349206349
Threshold : 0.6
Best Params : {'n_nonzero_coefs': 30}


### LARS lasso

In [21]:
model9 = LassoLars()

In [22]:
m9_params1 = {
    'alpha': [0.1, 1, 2, 5, 10, 20, 50, 100],
    'max_iter' : [n for n in range(800, 1601, 200)]
}

max_score=0
m9_best_t = 0
best_model9 = ""
m9_best_grid_1 = ""
for t in [0, 0.05, 0.1, 0.2, 0.25, 0.3, 0.45, 0.4, 0.45, 0.5, 0.6] :
    scorer2 = make_scorer(new_scorer, threshold=t)
    m9_grid_1 = GridSearchCV(model9, param_grid=m9_params1, scoring=scorer2, cv=2, verbose=0, n_jobs=-1)
    m9_grid_1.fit(X_train, y_train)

    if max_score < m9_grid_1.best_score_ :
        best_model9 = m9_grid_1.best_estimator_
        m9_best_t = t
        m9_best_grid_1 = m9_grid_1

m9_grid_1 = m9_best_grid_1
best_model9 = m9_grid_1.best_estimator_

print("Best Score : {}".format(m9_grid_1.best_score_))     
print("Threshold :", m9_best_t)
print("Best Params : {}".format(m9_grid_1.best_params_))

Best Score : 0.5277777777777778
Threshold : 0.6
Best Params : {'alpha': 0.1, 'max_iter': 800}


### ExtraTree

In [23]:
model10 = ExtraTreesClassifier()

In [24]:
m10_params1 = {
    'max_depth' : [None, 3, 5, 7, 9],
    'n_estimators' : [10, 50, 100, 300, 500]
}

m10_grid_1 = GridSearchCV(model10, param_grid=m10_params1, scoring=scorer, cv=2, verbose=0, n_jobs=-1)
m10_grid_1.fit(X_train, y_train)

best_model10 = m10_grid_1.best_estimator_

print("Best Score : {}".format(m10_grid_1.best_score_))
print("Best Params : {}".format(m10_grid_1.best_params_))

Best Score : 0.8518518518518517
Best Params : {'max_depth': 5, 'n_estimators': 10}


# Model Stacking

In [3]:
from keras.models import Sequential, model_from_json
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils

In [26]:
# layer1
models = [best_model1, best_model2, best_model3, best_model4, best_model5, best_model6, best_model7, best_model8, best_model9, best_model10]
S_train = stacking(models, X_train)

meta_xgb = stacking_xgb(S_train, y_train, cv=2)
meta_logistic = stacking_logistic(S_train, y_train, cv=2)
meta_NN = stacking_NN(S_train, y_train, cv=2)
meta_weight = stacking_weight(S_train, y_train, cv=2)

y_pred_lst = []
y_pred_binary_lst =[]
threshold = "auto"
for meta in [meta_xgb, meta_logistic, meta_NN, meta_weight] :
    pred = meta.predict_proba(S_train)[:, 1]
    y_pred_lst.append(pred)
    y_pred_binary_lst.append(pred_to_binary(pred, threshold = threshold))

model 1 is stacked
model 3 is stacked
model 4 is stacked
model 5 is stacked
model 7 is stacked
model 10 is stacked
Best Score : 0.5555555555555556
Best Params : {'colsample_bytree': 1.0, 'gamma': 1.5, 'learning_rate': 0.01, 'max_depth': 2, 'min_child_weight': 0.5, 'n_estimators': 100, 'probability': True, 'subsample': 0.5}
Best Score : 1.0
Best Params : {'C': 1, 'max_iter': 100}
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30


In [27]:
# layer2
models2 = [meta_xgb, meta_logistic, meta_NN, meta_weight]
S_train2 = stacking(models2, S_train, layer=2)

meta_xgb2 = stacking_xgb(S_train2, y_train, cv=2)
meta_logistic2 = stacking_logistic(S_train2, y_train, cv=2)
meta_NN2 = stacking_NN(S_train2, y_train, cv=2)
meta_weight2 = stacking_weight(S_train2, y_train, cv=2)

y_pred_lst2 = []
y_pred_binary_lst2 =[]
threshold = "auto"
for meta in [meta_xgb2, meta_logistic2, meta_NN2, meta_weight2] :
    pred = meta.predict_proba(S_train2)[:, 1]
    y_pred_lst2.append(pred)
    y_pred_binary_lst2.append(pred_to_binary(pred, threshold = threshold))

model 1 is stacked
model 2 is stacked
model 3 is stacked
model 4 is stacked
Best Score : 0.5555555555555556
Best Params : {'colsample_bytree': 0.5, 'gamma': 1.5, 'learning_rate': 0.01, 'max_depth': 2, 'min_child_weight': 0.5, 'n_estimators': 200, 'probability': True, 'subsample': 0.8}
Best Score : 1.0
Best Params : {'C': 10, 'max_iter': 100}
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Ep

In [28]:
# select model
meta_model2 = meta_weight2

In [29]:
print(making_result(S_train, y_pred_lst, y_pred_binary_lst, y_pred_lst2, y_pred_binary_lst2, y_train))

    m1        m3   m4        m5   m7  m10       xgb        lr        NN  \
0  0.5  0.993480  0.8  0.999988  0.0  1.0  0.502207  0.793455  0.732167   
1  0.5  0.439779  0.7  0.999837  0.0  1.0  0.502207  0.758269  0.657518   
2  0.5  0.086069  0.1  0.000119  0.0  0.0  0.494128  0.373469  0.421195   
3  0.5  0.054889  0.3  0.000024  0.0  0.0  0.494128  0.386006  0.448127   
4  0.5  0.390369  0.9  0.999975  0.0  1.0  0.502207  0.766915  0.673472   
5  0.5  0.105163  0.2  0.000005  0.0  0.0  0.494128  0.382234  0.436628   

     weight ...  weight_b      xgb2       lr2       NN2   weight2  xgb_b2  \
0  0.847067 ...       0.0  0.592395  0.730635  0.439951  0.672961     0.0   
1  0.881083 ...       0.0  0.592395  0.699041  0.442964  0.670809     0.0   
2  0.608616 ...       0.0  0.400482  0.345026  0.466465  0.589058     0.0   
3  0.653813 ...       0.0  0.400482  0.368206  0.464042  0.596622     0.0   
4  0.901112 ...       1.0  0.592395  0.711026  0.441642  0.674311     0.0   
5  0.625972 

# Save

In [30]:
pickle.dump(best_model1, open(path+'/model/model1.pickle.dat', 'wb'))
pickle.dump(best_model2, open(path+'/model/model2.pickle.dat', 'wb'))
pickle.dump(best_model3, open(path+'/model/model3.pickle.dat', 'wb'))
pickle.dump(best_model4, open(path+'/model/model4.pickle.dat', 'wb'))
pickle.dump(best_model5, open(path+'/model/model5.pickle.dat', 'wb'))
pickle.dump(best_model6, open(path+'/model/model6.pickle.dat', 'wb'))
pickle.dump(best_model7, open(path+'/model/model7.pickle.dat', 'wb'))
pickle.dump(best_model8, open(path+'/model/model8.pickle.dat', 'wb'))
pickle.dump(best_model9, open(path+'/model/model9.pickle.dat', 'wb'))
pickle.dump(best_model10, open(path+'/model/model10.pickle.dat', 'wb'))

In [31]:
pickle.dump(meta_xgb, open(path+'/model/meta_xgb.pickle.dat', 'wb'))
pickle.dump(meta_logistic, open(path+'/model/meta_logistic.pickle.dat', 'wb'))

meta_NN.model.save_weights(path+'/model/meta_NN.h5')
with open(path+'/model/meta_NN.json', 'w') as f :
    f.write(meta_NN.model.to_json())
    
meta_weight.model.save_weights(path+'/model/meta_weight.h5')
with open(path+'/model/meta_weight.json', 'w') as f :
    f.write(meta_weight.model.to_json())

In [32]:
pickle.dump(meta_xgb2, open(path+'/model/meta_xgb2.pickle.dat', 'wb'))
pickle.dump(meta_logistic2, open(path+'/model/meta_logistic2.pickle.dat', 'wb'))

meta_NN2.model.save_weights(path+'/model/meta_NN2.h5')
with open(path+'/model/meta_NN2.json', 'w') as f :
    f.write(meta_NN2.model.to_json())
    
meta_weight2.model.save_weights(path+'/model/meta_weight2.h5')
with open(path+'/model/meta_weight2.json', 'w') as f :
    f.write(meta_weight2.model.to_json())

In [33]:
meta_model2.model.save_weights(path+'/model/meta_model2.h5')
with open(path+'/model/meta_model2.json', 'w') as f :
    f.write(meta_model2.model.to_json())

# Loading & Prediction

In [6]:
model1 = pickle.load(open(path+'/model/model1.pickle.dat', 'rb'))
model2 = pickle.load(open(path+'/model/model2.pickle.dat', 'rb'))
model3 = pickle.load(open(path+'/model/model3.pickle.dat', 'rb'))
model4 = pickle.load(open(path+'/model/model4.pickle.dat', 'rb'))
model5 = pickle.load(open(path+'/model/model5.pickle.dat', 'rb'))
model6 = pickle.load(open(path+'/model/model6.pickle.dat', 'rb'))
model7 = pickle.load(open(path+'/model/model7.pickle.dat', 'rb'))
model8 = pickle.load(open(path+'/model/model8.pickle.dat', 'rb'))
model9 = pickle.load(open(path+'/model/model9.pickle.dat', 'rb'))
model10 = pickle.load(open(path+'/model/model10.pickle.dat', 'rb'))

In [7]:
meta_xgb = pickle.load(open(path+'/model/meta_xgb.pickle.dat', 'rb'))
meta_logistic = pickle.load(open(path+'/model/meta_logistic.pickle.dat', 'rb'))

with open(path+'/model/meta_NN.json', 'r') as f :
    meta_NN = model_from_json(f.read())
meta_NN.model.load_weights(path+'/model/meta_NN.h5')

with open(path+'/model/meta_weight.json', 'r') as f :
    meta_weight = model_from_json(f.read())
meta_weight.model.load_weights(path+'/model/meta_weight.h5')

In [8]:
meta_xgb2 = pickle.load(open(path+'/model/meta_xgb2.pickle.dat', 'rb'))
meta_logistic2 = pickle.load(open(path+'/model/meta_logistic2.pickle.dat', 'rb'))

with open(path+'/model/meta_NN2.json', 'r') as f :
    meta_NN2 = model_from_json(f.read())
meta_NN2.model.load_weights(path+'/model/meta_NN2.h5')

with open(path+'/model/meta_weight2.json', 'r') as f :
    meta_weight2 = model_from_json(f.read())
meta_weight2.model.load_weights(path+'/model/meta_weight2.h5')

In [37]:
with open(path+'/model/meta_model2.json', 'r') as f :
    meta_model2 = model_from_json(f.read())
meta_model2.model.load_weights(path+'/model/meta_weight2.h5')

<br><br>

In [20]:
def pred_to_binary(pred_array, threshold = 0.5):
    if threshold == "auto" :
        
        pred_binary = sorted(list(pred_array))
        threshold = pred_binary[int(len(pred_binary)*4/10)]
        pred_binary = np.array(pred_binary)
        pred_binary[pred_binary > threshold] = 1
        pred_binary[pred_binary <= threshold] = 0
        
    else :
        pred_binary = np.copy(pred_array)
        pred_binary[pred_binary > threshold] = 1
        pred_binary[pred_binary <= threshold] = 0

    return pred_binary

In [21]:
models = [model1, model2, model3, model4, model5, model6, model7, model8, model9, model10]
models2 = [meta_xgb, meta_logistic, meta_NN, meta_weight]
models3 = [meta_xgb2, meta_logistic2, meta_NN2, meta_weight2]

threshold = "auto"
print("\n---------- Inference ----------")
print("Threshold :", threshold)

S_test = stacking(models, X_test)
y_pred_lst = []
y_pred_binary_lst =[]
for meta in models2 :
    pred = meta.predict_proba(S_test)[:, 1]
    y_pred_lst.append(pred)
    y_pred_binary_lst.append(pred_to_binary(pred, threshold = threshold))

S_test2 = stacking(models2, S_test, layer=2)
y_pred_lst2 = []
y_pred_binary_lst2 =[]
threshold = "auto"
for meta in models3 :
    pred = meta.predict_proba(S_test2)[:, 1]
    y_pred_lst2.append(pred)
    y_pred_binary_lst2.append(pred_to_binary(pred, threshold = threshold))

final, final_df = export_csv(patient_num, error_patient, y_pred_binary_lst2, y_pred_lst2, path = path, index=3)
print(making_result(S_test, y_pred_lst, y_pred_binary_lst, y_pred_lst2, y_pred_binary_lst2, final))


---------- Inference ----------
Threshold : auto
model 1 is stacked
model 3 is stacked
model 4 is stacked
model 10 is stacked
model 1 is stacked
model 2 is stacked
model 3 is stacked
model 4 is stacked
    m1        m3    m4  m10       xgb        lr        NN    weight  xgb_b  \
0  0.5  0.000003  0.47  0.5  0.513582  0.565334  0.591972  0.360199    0.0   
1  0.5  0.561660  0.63  0.5  0.576999  0.576369  0.607946  0.363784    1.0   

   lr_b  NN_b  weight_b      xgb2       lr2       NN2   weight2  xgb_b2  \
0   0.0   0.0       0.0  0.594461  0.550571  0.555390  0.355301     0.0   
1   1.0   1.0       1.0  0.594461  0.573189  0.551675  0.355165     0.0   

   lr_b2  NN_b2  weight_b2    y  
0    0.0    0.0        0.0  0.0  
1    1.0    1.0        1.0  1.0  
