In [3]:
from utils.data_loader import train_data_loader, test_data_loader
from utils.inference_tools import pred_to_binary, export_csv, making_result
from utils.model_stacking import *

from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, AdaBoostClassifier
from sklearn.linear_model import LogisticRegression, Lasso, RidgeClassifier, SGDClassifier, Lars, LassoLars
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.metrics import fbeta_score, make_scorer

from keras.models import Sequential, model_from_json
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils

import pandas as pd
import numpy as np
import pickle
import datetime

import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

# Load Data and Pre-processing

In [4]:
# Print Current Time
time = str(datetime.datetime.now()).split()[1].split('.')[0]
print("Start:", time)


# Print Information
name = 'KHW'
model = 'ML Stacking'
summary = 'HyperParams tuning with 10 sklearn models + 4 stacking model'

print('Author Name :', name)
print('Model :', model)
print('Summary :', summary)
print("\n")


# Setting
path = "./data"
pos_dir = path+"/train/positive/"
neg_dir = path+"/train/negative/"
test_dir = path+'/test/'

features = ['firstorder', 'shape']
target_voxel = (0.65, 0.65, 3)

Start: 14:51:25
Author Name : KHW
Model : ML Stacking
Summary : HyperParams tuning with 10 sklearn models + 4 stacking model




In [5]:
do_n4 = False
do_ws = True
do_resample = True
do_shuffle = True

X_train, y_train = train_data_loader(pos_dir, neg_dir, do_n4, do_ws, do_resample, do_shuffle, features, target_voxel)

Processing [1/3] Image of Positive Patient... (14:51:26)
>>> Finished : Voxel Size Resampling (14:51:39)
>>> Unique Value of BRAIN mask : [0. 1.]
>>> Unique Value of INFARCT mask : [0. 1.]
>>> Finished : White-stripe Normalization (14:51:40)


GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated


Processing [2/3] Image of Positive Patient... (14:51:42)
>>> Finished : Voxel Size Resampling (14:51:55)
>>> Unique Value of BRAIN mask : [0. 1.]
>>> Unique Value of INFARCT mask : [0. 1.]
>>> Finished : White-stripe Normalization (14:51:56)


GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated


Processing [3/3] Image of Positive Patient... (14:51:58)
>>> Finished : Voxel Size Resampling (14:52:11)
>>> Unique Value of BRAIN mask : [0. 1.]
>>> Unique Value of INFARCT mask : [0. 1.]
>>> Finished : White-stripe Normalization (14:52:12)


GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated


Processing [1/3] Image of Negative Patient... (14:52:14)
>>> Finished : Voxel Size Resampling (14:52:27)
>>> Unique Value of BRAIN mask : [0. 1.]
>>> Unique Value of INFARCT mask : [0. 1.]
>>> Finished : White-stripe Normalization (14:52:28)


GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated


Processing [2/3] Image of Negative Patient... (14:52:30)
>>> Finished : Voxel Size Resampling (14:52:44)
>>> Unique Value of BRAIN mask : [0. 1.]
>>> Unique Value of INFARCT mask : [0. 1.]
>>> Finished : White-stripe Normalization (14:52:45)


GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated


Processing [3/3] Image of Negative Patient... (14:52:48)
>>> Finished : Voxel Size Resampling (14:53:01)
>>> Unique Value of BRAIN mask : [0. 1.]
>>> Unique Value of INFARCT mask : [0. 1.]
>>> Finished : White-stripe Normalization (14:53:02)


GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated


Created X of shape (6, 64) and y of shape (6,) (14:53:04)


In [6]:
do_n4 = False
do_ws = True
do_resample = True

X_test, patient_num, error_patient = test_data_loader(test_dir, do_n4, do_ws, do_resample, features, target_voxel)

Processing [1/2] Image of Test Patient... (14:53:04)
>>> Finished : Voxel Size Resampling (14:53:18)
>>> Unique Value of BRAIN mask : [0. 1.]
>>>Unique Value of INFARCT mask : [0. 1.]
>>> Finished : White-stripe Normalization (14:53:19)


GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated


Processing [2/2] Image of Test Patient... (14:53:21)
>>> Finished : Voxel Size Resampling (14:53:34)
>>> Unique Value of BRAIN mask : [0. 1.]
>>>Unique Value of INFARCT mask : [0. 1.]
>>> Finished : White-stripe Normalization (14:53:35)


GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated


# Base Model

### xgboost

In [None]:
# Fit Model with Training Data
model1 = XGBClassifier(n_jobs=4)
model1.fit(X_train, y_train)


# Save model to file
# pickle.dump(model1, open('./data/model/model1.pickle.dat', 'wb'))

### svm

In [None]:
# Fit Model with Training Data
model2 = SVC()
model2.fit(X_train, y_train)


# Save model to file
# pickle.dump(model2, open('./data/model/model2.pickle.dat', 'wb'))

### logistic regression

In [None]:
# Fit Model with Training Data
model3 = LogisticRegression(n_jobs=4)
model3.fit(X_train, y_train)


# Save model to file
# pickle.dump(model3, open('./data/model/model3.pickle.dat', 'wb'))

### random forest

In [None]:
# Fit Model with Training Data
model4 = RandomForestClassifier(n_jobs=4)
model4.fit(X_train, y_train)


# Save model to file
# pickle.dump(mode4l, open('./data/model/model4.pickle.dat', 'wb'))

<br><br><br>

# Score

In [7]:
BETA=0.75

In [8]:
def new_scorer(y_true, y_pred, threshold=0.5) :
    result = []
    global BETA

    for pred in list(y_pred) :
        if pred >= threshold :
            result.append(1)
        else :
            result.append(0)
            
    return fbeta_score(y_true, np.array(result), beta=BETA)

In [9]:
scorer = make_scorer(fbeta_score, beta=BETA)

# Modeling

### MLP

### CNN

# Parameter Tuning & CV

In [None]:
cv=2

### xgboost

In [10]:
model1 = XGBClassifier()

In [11]:
m1_params1 = {
    'max_depth' : [5,6,7,8],
    'min_child_weight' : [0.5, 1, 5, 10, 15, 20],
    'gamma' : [1.5, 2, 2.5, 3.0, 5],
    'subsample' : [0.5, 0.6, 0.8, 1.0],
    'colsample_bytree' : [0.5, 0.6, 0.8, 1.0],
    'probability' : [True],
    'learning_rate' : [0.01, 0.05, 0.1],
    'n_estimators' : [300, 500, 700]

}

m1_grid_1 = GridSearchCV(model1, param_grid=m1_params1, scoring=scorer, cv=cv, verbose=0, n_jobs=-1)
m1_grid_1.fit(X_train, y_train)

best_model1 = m1_grid_1.best_estimator_

print("Best Score : {}".format(m1_grid_1.best_score_))
print("Best Params : {}".format(m1_grid_1.best_params_))

Best Score : 0.0
Best Params : {'max_depth': 5}


### svm

In [12]:
model2 = SVC()

In [13]:
m2_params1 = {
    'C': [0.001, 0.01, 0.1, 1, 10, 100], 
    'gamma' : [0.001, 0.01, 0.1, 1, 2, 5, 10, 20],
    'degree' : [2,3,4],
    'probability' : [True]
}

m2_grid_1 = GridSearchCV(model2, param_grid=m2_params1, scoring=scorer, cv=cv, verbose=0, n_jobs=-1)
m2_grid_1.fit(X_train, y_train)

best_model2 = m2_grid_1.best_estimator_

print("Best Score : {}".format(m2_grid_1.best_score_))
print("Best Params : {}".format(m2_grid_1.best_params_))

Best Score : 0.6097560975609756
Best Params : {'C': 0.001, 'degree': 2, 'gamma': 0.001, 'probability': True}


### logistic regression

In [14]:
model3 = LogisticRegression()

In [15]:
m3_params1 = {
    'C': [0.001, 0.01, 0.1, 1, 10, 100],
    'max_iter' : [n for n in range(100,1101, 200)],
}

m3_grid_1 = GridSearchCV(model3, param_grid=m3_params1, scoring=scorer, cv=cv, verbose=0, n_jobs=-1)
m3_grid_1.fit(X_train, y_train)

best_model3 = m3_grid_1.best_estimator_

print("Best Score : {}".format(m3_grid_1.best_score_))
print("Best Params : {}".format(m3_grid_1.best_params_))

Best Score : 0.6097560975609756
Best Params : {'C': 0.001, 'max_iter': 100}


### random forest

In [16]:
model4 = RandomForestClassifier()

In [17]:
m4_params1 = {
    'max_depth' : [6, 8, 10, 15, 20, 30, 40, 50],
    'min_samples_leaf': [1, 2, 3, 4, 5,10, 20, 50],
    'n_estimators' : [100, 300, 500]
}

m4_grid_1 = GridSearchCV(model4, param_grid=m4_params1, scoring=scorer, cv=cv, verbose=0, n_jobs=-1)
m4_grid_1.fit(X_train, y_train)

best_model4 = m4_grid_1.best_estimator_

print("Best Score : {}".format(m4_grid_1.best_score_))
print("Best Params : {}".format(m4_grid_1.best_params_))

Best Score : 0.7398373983739838
Best Params : {'max_depth': 15}


### lasso regression

In [18]:
model5 = LogisticRegression()

In [19]:
m5_params1 = {
    'C': [0.001, 0.01, 0.1, 1, 10, 100],
    'max_iter' : [n for n in range(100,1101, 200)],
    'penalty' : ["l1"]
}

m5_grid_1 = GridSearchCV(model5, param_grid=m5_params1, scoring=scorer, cv=cv, verbose=0, n_jobs=-1)
m5_grid_1.fit(X_train, y_train)

best_model5 = m5_grid_1.best_estimator_

print("Best Score : {}".format(m5_grid_1.best_score_))
print("Best Params : {}".format(m5_grid_1.best_params_))

Best Score : 0.6097560975609756
Best Params : {'C': 0.001, 'max_iter': 100, 'penalty': 'l1'}


### ridge regression

In [20]:
model6 = RidgeClassifier()

In [21]:
m6_params1 = {
    'alpha': [0.1, 1, 2, 5, 10, 20, 50, 100],
    'max_iter' : [None]+[n for n in range(100,1101, 200)]
}

m6_grid_1 = GridSearchCV(model6, param_grid=m6_params1, scoring=scorer, cv=cv, verbose=0, n_jobs=-1)
m6_grid_1.fit(X_train, y_train)

best_model6 = m6_grid_1.best_estimator_

print("Best Score : {}".format(m6_grid_1.best_score_))
print("Best Params : {}".format(m6_grid_1.best_params_))

Best Score : 0.4557772850455777
Best Params : {'alpha': 0.1, 'max_iter': None}


### elasticNet

In [22]:
model7 = SGDClassifier()

In [23]:
m7_params1 = {
    'alpha': [0.001, 0.01, 0.1, 1, 2, 5, 10, 20, 50, 100],
    'l1_ratio':[0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7], 
    'max_iter' : [None]+[n for n in range(800, 1601, 200)],
    'penalty' : ["elasticnet"],
    'loss' : ["log"]
}

m7_grid_1 = GridSearchCV(model7, param_grid=m7_params1, scoring=scorer, cv=cv, verbose=0, n_jobs=-1)
m7_grid_1.fit(X_train, y_train)

best_model7 = m7_grid_1.best_estimator_

print("Best Score : {}".format(m7_grid_1.best_score_))
print("Best Params : {}".format(m7_grid_1.best_params_))

Best Score : 0.6097560975609756
Best Params : {'alpha': 5, 'l1_ratio': 0.6, 'loss': 'log', 'max_iter': 1000, 'penalty': 'elasticnet'}


### LARS

In [24]:
model8 = Lars()

In [25]:
m8_params1 = {
    'n_nonzero_coefs': [n for n in range(30, 150, 20)]
}

max_score=0
m8_best_t = 0
best_model8 = ""
m8_best_grid_1 = ""

for t in [0, 0.05, 0.1, 0.2, 0.25, 0.3, 0.45, 0.4, 0.45, 0.5, 0.6] :
    scorer2 = make_scorer(new_scorer, threshold=t)
    m8_grid_1 = GridSearchCV(model8, param_grid=m8_params1, scoring=scorer2, cv=cv, verbose=0, n_jobs=-1)
    m8_grid_1.fit(X_train, y_train)

    if max_score < m8_grid_1.best_score_ :
        best_model8 = m8_grid_1.best_estimator_
        m8_best_t = t
        m8_best_grid_1 = m8_grid_1
        
m8_grid_1 = m8_best_grid_1
best_model8 = m8_grid_1.best_estimator_

print("Best Score : {}".format(m8_grid_1.best_score_))     
print("Threshold :", m8_best_t)
print("Best Params : {}".format(m8_grid_1.best_params_))

Best Score : 0.5869453044375644
Threshold : 0.6
Best Params : {'n_nonzero_coefs': 30}


### LARS lasso

In [26]:
model9 = LassoLars()

In [27]:
m9_params1 = {
    'alpha': [0.1, 1, 2, 5, 10, 20, 50, 100],
    'max_iter' : [n for n in range(800, 1601, 200)]
}

max_score=0
m9_best_t = 0
best_model9 = ""
m9_best_grid_1 = ""
for t in [0, 0.05, 0.1, 0.2, 0.25, 0.3, 0.45, 0.4, 0.45, 0.5, 0.6] :
    scorer2 = make_scorer(new_scorer, threshold=t)
    m9_grid_1 = GridSearchCV(model9, param_grid=m9_params1, scoring=scorer2, cv=cv, verbose=0, n_jobs=-1)
    m9_grid_1.fit(X_train, y_train)

    if max_score < m9_grid_1.best_score_ :
        best_model9 = m9_grid_1.best_estimator_
        m9_best_t = t
        m9_best_grid_1 = m9_grid_1

m9_grid_1 = m9_best_grid_1
best_model9 = m9_grid_1.best_estimator_

print("Best Score : {}".format(m9_grid_1.best_score_))     
print("Threshold :", m9_best_t)
print("Best Params : {}".format(m9_grid_1.best_params_))

Best Score : 0.5869453044375644
Threshold : 0.6
Best Params : {'alpha': 0.1, 'max_iter': 800}


### ExtraTree

In [28]:
model10 = ExtraTreesClassifier()

In [29]:
m10_params1 = {
    'max_depth' : [None, 3, 5, 7, 9],
    'n_estimators' : [10, 50, 100, 300, 500]
}

m10_grid_1 = GridSearchCV(model10, param_grid=m10_params1, scoring=scorer, cv=cv, verbose=0, n_jobs=-1)
m10_grid_1.fit(X_train, y_train)

best_model10 = m10_grid_1.best_estimator_

print("Best Score : {}".format(m10_grid_1.best_score_))
print("Best Params : {}".format(m10_grid_1.best_params_))

Best Score : 0.6097560975609756
Best Params : {'max_depth': None, 'n_estimators': 10}


### AdaBoost

In [33]:
model11 = AdaBoostClassifier()

In [36]:
m11_params1 = {
    'n_estimators' : [100, 300, 500],
    'learning_rate' : [0.01, 0.05, 0.1],
    'algorithm' :['SAMME.R']
}

m11_grid_1 = GridSearchCV(model11, param_grid=m11_params1, scoring=scorer, cv=cv, verbose=0, n_jobs=-1)
m11_grid_1.fit(X_train, y_train)

best_model11 = m11_grid_1.best_estimator_

print("Best Score : {}".format(m11_grid_1.best_score_))
print("Best Params : {}".format(m11_grid_1.best_params_))

Best Score : 0.5858585858585857
Best Params : {'algorithm': 'SAMME.R', 'learning_rate': 0.05, 'n_estimators': 300}


### lightgbm

In [37]:
model12 = LGBMClassifier()

In [38]:
m12_params1 = {
    'max_depth' : [-1,5,7,9],
    'min_child_weight' : [0.5, 1, 5, 10],
    'colsample_bytree' : [0.6, 0.8, 1.0],
    'subsample' : [0.6, 0.8, 1.0],
    'probability' : [True],
    'learning_rate' : [0.05, 0.1],
    'n_estimators' : [100, 300, 500],
    'reg_alpha' : [0.0, 1.0, 5.0, 10.0], 
    'reg_lambda' : [0.0, 1.0, 5.0, 10.0]
}

m12_grid_1 = GridSearchCV(model12, param_grid=m12_params1, scoring=scorer, cv=cv, verbose=0, n_jobs=-1)
m12_grid_1.fit(X_train, y_train)

best_model12 = m12_grid_1.best_estimator_

print("Best Score : {}".format(m12_grid_1.best_score_))
print("Best Params : {}".format(m12_grid_1.best_params_))

Best Score : 0.0
Best Params : {'colsample_bytree': 0.6, 'learning_rate': 0.05, 'max_depth': -1, 'min_child_weight': 0.5, 'n_estimators': 100, 'probability': True, 'reg_alpha': 0.0, 'reg_lambda': 0.0, 'subsample': 0.6}


# Model Stacking

In [41]:
def stacking(models, data, include, predict_binary=[None]) : 
    result = []
    
    for idx, model in enumerate(models) :
        if idx+1 not in include :
            continue
                
        if idx+1 in predict_binary :
            result.append(model.predict(data))
        else :
            result.append(model.predict_proba(data)[:,1])
            print("model", idx+1, "is stacked")
 
    print("\n")
    return np.array(result).T

In [42]:
# layer1
models = [best_model1, best_model2, best_model3, best_model4, best_model5, best_model6, 
          best_model7, best_model8, best_model9, best_model10, best_model11, best_model12]
S_train = stacking(models, X_train, [1,3,4,10,11,12])

meta_xgb = stacking_xgb(S_train, y_train, cv=cv)
meta_logistic = stacking_logistic(S_train, y_train, cv=cv)
meta_NN = stacking_NN(S_train, y_train, cv=cv)
meta_weight = stacking_weight(S_train, y_train, cv=cv)

y_pred_lst = []
y_pred_binary_lst =[]
threshold = "auto"

for meta in [meta_xgb, meta_logistic, meta_NN, meta_weight] :
    pred = meta.predict_proba(S_train)[:, 1]
    y_pred_lst.append(pred)
    y_pred_binary_lst.append(pred_to_binary(pred, threshold = threshold))

model 1 is stacked
model 3 is stacked
model 4 is stacked
model 10 is stacked
model 11 is stacked
model 12 is stacked


Best Score : 0.0
Best Params : {'max_depth': 3, 'n_estimators': 50}
Best Score : 1.0
Best Params : {'C': 10, 'max_iter': 100}
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [43]:
# layer2
models2 = [meta_xgb, meta_logistic, meta_NN, meta_weight]
S_train2 = stacking(models2, S_train, [1,2,3,4])

meta_NN2 = stacking_NN(S_train2, y_train, cv=cv)
meta_weight2 = stacking_weight(S_train2, y_train, cv=cv)

y_pred_lst2 = []
y_pred_binary_lst2 =[]
threshold = "auto"

for meta in [meta_NN2, meta_weight2] :
    pred = meta.predict_proba(S_train2)[:, 1]
    y_pred_lst2.append(pred)
    y_pred_binary_lst2.append(pred_to_binary(pred, threshold = threshold))

model 1 is stacked
model 2 is stacked
model 3 is stacked
model 4 is stacked


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [45]:
print(making_result(S_train, y_pred_lst, y_pred_binary_lst, y_pred_lst2, y_pred_binary_lst2, y_train, [1,3,4,10,11,12], [1,2,3,4]))

    m1        m3   m4  m10           m11  m12  stack_1   stack_2   stack_3  \
0  0.5  0.473162  0.8  1.0  1.000000e+00  0.5      0.5  0.927291  0.627924   
1  0.5  0.653335  0.1  0.0  2.220446e-16  0.5      0.5  0.095648  0.429797   
2  0.5  0.643683  1.0  1.0  1.000000e+00  0.5      0.5  0.935867  0.630637   
3  0.5  0.482826  0.9  1.0  1.000000e+00  0.5      0.5  0.933229  0.625625   
4  0.5  0.485779  0.2  0.0  2.220446e-16  0.5      0.5  0.109274  0.408193   
5  0.5  0.173242  0.4  0.0  2.220446e-16  0.5      0.5  0.140798  0.364895   

    stack_4  stack_b_1  stack_b_2  stack_b_3  stack_b_4       NN2   weight2  \
0  0.698935        0.0        1.0        1.0        1.0  0.552285  0.595619   
1  0.646767        0.0        0.0        0.0        0.0  0.437734  0.433050   
2  0.714813        0.0        1.0        1.0        1.0  0.552184  0.595406   
3  0.705437        0.0        1.0        1.0        1.0  0.552413  0.596618   
4  0.649994        0.0        0.0        0.0        0.0  0

# Save

In [46]:
pickle.dump(best_model1, open(path+'/model/model1.pickle.dat', 'wb'))
pickle.dump(best_model2, open(path+'/model/model2.pickle.dat', 'wb'))
pickle.dump(best_model3, open(path+'/model/model3.pickle.dat', 'wb'))
pickle.dump(best_model4, open(path+'/model/model4.pickle.dat', 'wb'))
pickle.dump(best_model5, open(path+'/model/model5.pickle.dat', 'wb'))
pickle.dump(best_model6, open(path+'/model/model6.pickle.dat', 'wb'))
pickle.dump(best_model7, open(path+'/model/model7.pickle.dat', 'wb'))
pickle.dump(best_model8, open(path+'/model/model8.pickle.dat', 'wb'))
pickle.dump(best_model9, open(path+'/model/model9.pickle.dat', 'wb'))
pickle.dump(best_model10, open(path+'/model/model10.pickle.dat', 'wb'))
pickle.dump(best_model11, open(path+'/model/model11.pickle.dat', 'wb'))
pickle.dump(best_model12, open(path+'/model/model12.pickle.dat', 'wb'))

In [47]:
pickle.dump(meta_xgb, open(path+'/model/meta_xgb.pickle.dat', 'wb'))
pickle.dump(meta_logistic, open(path+'/model/meta_logistic.pickle.dat', 'wb'))

meta_NN.model.save_weights(path+'/model/meta_NN.h5')
with open(path+'/model/meta_NN.json', 'w') as f :
    f.write(meta_NN.model.to_json())
    
meta_weight.model.save_weights(path+'/model/meta_weight.h5')
with open(path+'/model/meta_weight.json', 'w') as f :
    f.write(meta_weight.model.to_json())

In [48]:
meta_NN2.model.save_weights(path+'/model/meta_NN2.h5')
with open(path+'/model/meta_NN2.json', 'w') as f :
    f.write(meta_NN2.model.to_json())
    
meta_weight2.model.save_weights(path+'/model/meta_weight2.h5')
with open(path+'/model/meta_weight2.json', 'w') as f :
    f.write(meta_weight2.model.to_json())

# Loading & Prediction

In [49]:
model1 = pickle.load(open(path+'/model/model1.pickle.dat', 'rb'))
model2 = pickle.load(open(path+'/model/model2.pickle.dat', 'rb'))
model3 = pickle.load(open(path+'/model/model3.pickle.dat', 'rb'))
model4 = pickle.load(open(path+'/model/model4.pickle.dat', 'rb'))
model5 = pickle.load(open(path+'/model/model5.pickle.dat', 'rb'))
model6 = pickle.load(open(path+'/model/model6.pickle.dat', 'rb'))
model7 = pickle.load(open(path+'/model/model7.pickle.dat', 'rb'))
model8 = pickle.load(open(path+'/model/model8.pickle.dat', 'rb'))
model9 = pickle.load(open(path+'/model/model9.pickle.dat', 'rb'))
model10 = pickle.load(open(path+'/model/model10.pickle.dat', 'rb'))
model11 = pickle.load(open(path+'/model/model11.pickle.dat', 'rb'))
model12 = pickle.load(open(path+'/model/model12.pickle.dat', 'rb'))

In [50]:
meta_xgb = pickle.load(open(path+'/model/meta_xgb.pickle.dat', 'rb'))
meta_logistic = pickle.load(open(path+'/model/meta_logistic.pickle.dat', 'rb'))

with open(path+'/model/meta_NN.json', 'r') as f :
    meta_NN = model_from_json(f.read())
meta_NN.model.load_weights(path+'/model/meta_NN.h5')

with open(path+'/model/meta_weight.json', 'r') as f :
    meta_weight = model_from_json(f.read())
meta_weight.model.load_weights(path+'/model/meta_weight.h5')

In [51]:
with open(path+'/model/meta_NN2.json', 'r') as f :
    meta_NN2 = model_from_json(f.read())
meta_NN2.model.load_weights(path+'/model/meta_NN2.h5')

with open(path+'/model/meta_weight2.json', 'r') as f :
    meta_weight2 = model_from_json(f.read())
meta_weight2.model.load_weights(path+'/model/meta_weight2.h5')

<br><br>

In [53]:
models = [model1, model2, model3, model4, model5, model6, model7, model8, model9, model10, model11, model12]
models2 = [meta_xgb, meta_logistic, meta_NN, meta_weight]
models3 = [meta_NN2, meta_weight2]

threshold = "auto"
print("\n---------- Inference ----------")
print("Threshold :", threshold)

S_test = stacking(models, X_test, [1,3,4,10,11,12])
y_pred_lst = []
y_pred_binary_lst =[]

for meta in models2 :
    pred = meta.predict_proba(S_test)[:, 1]
    y_pred_lst.append(pred)
    y_pred_binary_lst.append(pred_to_binary(pred, threshold = threshold))

S_test2 = stacking(models2, S_test, [1,2,3,4])
y_pred_lst2 = []
y_pred_binary_lst2 =[]

for meta in models3 :
    pred = meta.predict_proba(S_test2)[:, 1]
    y_pred_lst2.append(pred)
    y_pred_binary_lst2.append(pred_to_binary(pred, threshold = threshold))

final, final_df = export_csv(patient_num, error_patient, y_pred_binary_lst2, y_pred_lst2, path = path, index=0)
print(making_result(S_test, y_pred_lst, y_pred_binary_lst, y_pred_lst2, y_pred_binary_lst2, final, [1,3,4,10,11,12], [1,2,3,4]))


---------- Inference ----------
Threshold : auto
model 1 is stacked
model 3 is stacked
model 4 is stacked
model 10 is stacked
model 11 is stacked
model 12 is stacked


model 1 is stacked
model 2 is stacked
model 3 is stacked
model 4 is stacked


    m1        m3   m4  m10  m11  m12  stack_1   stack_2   stack_3   stack_4  \
0  0.5  0.561660  0.8  0.5  1.0  0.5      0.5  0.817496  0.603768  0.638133   
1  0.5  0.000003  0.6  0.5  1.0  0.5      0.5  0.816095  0.586195  0.611409   

   stack_b_1  stack_b_2  stack_b_3  stack_b_4       NN2   weight2  NN_b2  \
0        0.0        1.0        1.0        1.0  0.543517  0.580466    0.0   
1        0.0        0.0        0.0        0.0  0.544415  0.585899    1.0   

   weight_b2    y  
0        0.0  0.0  
1        1.0  1.0  
