In [2]:
from utils.data_loader import train_data_loader, test_data_loader
from utils.inference_tools import pred_to_binary, export_csv, making_result
from utils.model_stacking import *
import vecstack

from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, AdaBoostClassifier
from sklearn.linear_model import LogisticRegression, Lasso, RidgeClassifier, SGDClassifier, Lars, LassoLars
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.metrics import fbeta_score, make_scorer
from sklearn.neural_network import MLPClassifier

from keras.models import Sequential, model_from_json
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils

import pandas as pd
import numpy as np
import pickle
import datetime

import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

# Load Data and Pre-processing

In [3]:
# Setting
path = "./data"
pos_dir = path+"/train/positive/"
neg_dir = path+"/train/negative/"

save_dir = path+"/model/"
test_dir = path+'/test/'

features = ['firstorder', 'shape']
target_voxel = (0.65, 0.65, 3)

In [4]:
norm = 'new'
do_resample = True
do_shuffle = True
do_minmax = True

X_train, y_train = train_data_loader(pos_dir, neg_dir, norm, do_resample, do_shuffle, do_minmax, features, target_voxel, path=path)

Created X of shape (6, 64) and y of shape (6,) (18:01:17)


In [5]:
norm = 'new'
do_resample = True
do_minmax = True

X_test, patient_num, error_patient = test_data_loader(test_dir, norm, do_resample, do_minmax, features, target_voxel, path=path)

In [6]:
#np.save(save_dir+"X_train.npy", X_train)
#np.save(save_dir+"y_train.npy", y_train)

#X_train = np.load(save_dir+"X_train.npy")
#y_train = np.load(save_dir+"y_train.npy")

# Base Model

### xgboost

In [8]:
# Fit Model with Training Data
model1 = XGBClassifier(n_jobs=4)
model1.fit(X_train, y_train)


# Save model to file
# pickle.dump(model1, open('./data/model/model1.pickle.dat', 'wb'))

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
       max_depth=3, min_child_weight=1, missing=None, n_estimators=100,
       n_jobs=4, nthread=None, objective='binary:logistic', random_state=0,
       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
       silent=True, subsample=1)

### svm

In [9]:
# Fit Model with Training Data
model2 = SVC()
model2.fit(X_train, y_train)


# Save model to file
# pickle.dump(model2, open('./data/model/model2.pickle.dat', 'wb'))

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

### logistic regression

In [10]:
# Fit Model with Training Data
model3 = LogisticRegression(n_jobs=4)
model3.fit(X_train, y_train)


# Save model to file
# pickle.dump(model3, open('./data/model/model3.pickle.dat', 'wb'))

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=4,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

### random forest

In [11]:
# Fit Model with Training Data
model4 = RandomForestClassifier(n_jobs=4)
model4.fit(X_train, y_train)


# Save model to file
# pickle.dump(mode4l, open('./data/model/model4.pickle.dat', 'wb'))

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=4,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

<br><br><br>

# Score

In [11]:
BETA=0.75

In [12]:
def new_scorer(y_true, y_pred, threshold=0.5) :
    result = []
    global BETA

    for pred in list(y_pred) :
        if pred >= threshold :
            result.append(1)
        else :
            result.append(0)
            
    return fbeta_score(y_true, np.array(result), beta=BETA)

In [13]:
scorer = make_scorer(fbeta_score, beta=BETA)

# Modeling

### MLP

### CNN

# Parameter Tuning & CV

In [15]:
cv=2

### xgboost

In [10]:
model1 = XGBClassifier()

In [11]:
m1_params1 = {
    'max_depth' : [5,6,7,8],
    'min_child_weight' : [0.5, 1, 5, 10, 15, 20],
    'gamma' : [1.5, 2, 2.5, 3.0, 5],
    'subsample' : [0.5, 0.6, 0.8, 1.0],
    'colsample_bytree' : [0.5, 0.6, 0.8, 1.0],
    'probability' : [True],
    'learning_rate' : [0.01, 0.05, 0.1],
    'n_estimators' : [300, 500, 700],
    'random_state' : [1213]

}

m1_grid_1 = GridSearchCV(model1, param_grid=m1_params1, scoring=scorer, cv=cv, verbose=0, n_jobs=-1)
m1_grid_1.fit(X_train, y_train)

best_model1 = m1_grid_1.best_estimator_

print("Best Score : {}".format(m1_grid_1.best_score_))
print("Best Params : {}".format(m1_grid_1.best_params_))

Best Score : 0.0
Best Params : {'max_depth': 5}


### svm

In [18]:
model2 = SVC()

In [19]:
m2_params1 = {
    'C': [0.001, 0.01, 0.1, 1, 10, 100], 
    'gamma' : [0.001, 0.01, 0.1, 1, 2, 5, 10, 20],
    'degree' : [2,3,4],
    'probability' : [True]
}

m2_grid_1 = GridSearchCV(model2, param_grid=m2_params1, scoring=scorer, cv=cv, verbose=0, n_jobs=-1)
m2_grid_1.fit(X_train, y_train)

best_model2 = m2_grid_1.best_estimator_

print("Best Score : {}".format(m2_grid_1.best_score_))
print("Best Params : {}".format(m2_grid_1.best_params_))

Best Score : 0.6097560975609756
Best Params : {'C': 0.001, 'degree': 2, 'gamma': 0.001, 'probability': True}


### logistic regression

In [20]:
model3 = LogisticRegression()

In [21]:
m3_params1 = {
    'C': [0.001, 0.01, 0.1, 1, 10, 100],
    'max_iter' : [n for n in range(100,1101, 200)],
}

m3_grid_1 = GridSearchCV(model3, param_grid=m3_params1, scoring=scorer, cv=cv, verbose=0, n_jobs=-1)
m3_grid_1.fit(X_train, y_train)

best_model3 = m3_grid_1.best_estimator_

print("Best Score : {}".format(m3_grid_1.best_score_))
print("Best Params : {}".format(m3_grid_1.best_params_))

Best Score : 0.4065040650406504
Best Params : {'C': 0.001, 'max_iter': 100}


### random forest

In [17]:
model4 = RandomForestClassifier()

In [22]:
m4_params1 = {
    'max_depth' : [6, 8, 10, 15, 20, 30, 40, 50],
    'min_samples_leaf': [1, 2, 3, 4, 5,10, 20, 50],
    'n_estimators' : [100, 300, 500],
    'random_state' : [1213]
}

m4_grid_1 = GridSearchCV(model4, param_grid=m4_params1, scoring=scorer, cv=cv, verbose=0, n_jobs=-1)
m4_grid_1.fit(X_train, y_train)

best_model4 = m4_grid_1.best_estimator_

print("Best Score : {}".format(m4_grid_1.best_score_))
print("Best Params : {}".format(m4_grid_1.best_params_))

Best Score : 0.3333333333333333
Best Params : {'max_depth': 15}


### lasso regression

In [23]:
model5 = LogisticRegression()

In [24]:
m5_params1 = {
    'C': [0.001, 0.01, 0.1, 1, 10, 100],
    'max_iter' : [n for n in range(100,1101, 200)],
    'penalty' : ["l1"]
}

m5_grid_1 = GridSearchCV(model5, param_grid=m5_params1, scoring=scorer, cv=cv, verbose=0, n_jobs=-1)
m5_grid_1.fit(X_train, y_train)

best_model5 = m5_grid_1.best_estimator_

print("Best Score : {}".format(m5_grid_1.best_score_))
print("Best Params : {}".format(m5_grid_1.best_params_))

Best Score : 0.3333333333333333
Best Params : {'C': 10, 'max_iter': 500, 'penalty': 'l1'}


### ridge regression

In [25]:
model6 = RidgeClassifier()

In [26]:
m6_params1 = {
    'alpha': [0.1, 1, 2, 5, 10, 20, 50, 100],
    'max_iter' : [None]+[n for n in range(100,1101, 200)]
}

m6_grid_1 = GridSearchCV(model6, param_grid=m6_params1, scoring=scorer, cv=cv, verbose=0, n_jobs=-1)
m6_grid_1.fit(X_train, y_train)

best_model6 = m6_grid_1.best_estimator_

print("Best Score : {}".format(m6_grid_1.best_score_))
print("Best Params : {}".format(m6_grid_1.best_params_))

Best Score : 0.2032520325203252
Best Params : {'alpha': 0.1, 'max_iter': None}


### elasticNet

In [27]:
model7 = SGDClassifier()

In [28]:
m7_params1 = {
    'alpha': [0.001, 0.01, 0.1, 1, 2, 5, 10, 20, 50, 100],
    'l1_ratio':[0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7], 
    'max_iter' : [None]+[n for n in range(800, 1601, 200)],
    'penalty' : ["elasticnet"],
    'loss' : ["log"]
}

m7_grid_1 = GridSearchCV(model7, param_grid=m7_params1, scoring=scorer, cv=cv, verbose=0, n_jobs=-1)
m7_grid_1.fit(X_train, y_train)

best_model7 = m7_grid_1.best_estimator_

print("Best Score : {}".format(m7_grid_1.best_score_))
print("Best Params : {}".format(m7_grid_1.best_params_))

Best Score : 0.4065040650406504
Best Params : {'alpha': 0.001, 'l1_ratio': 0.1, 'loss': 'log', 'max_iter': None, 'penalty': 'elasticnet'}


### LARS

In [29]:
model8 = Lars()

In [30]:
m8_params1 = {
    'n_nonzero_coefs': [n for n in range(30, 150, 20)]
}

max_score=0
m8_best_t = 0
best_model8 = ""
m8_best_grid_1 = ""

for t in [0, 0.05, 0.1, 0.2, 0.25, 0.3, 0.45, 0.4, 0.45, 0.5, 0.6] :
    scorer2 = make_scorer(new_scorer, threshold=t)
    m8_grid_1 = GridSearchCV(model8, param_grid=m8_params1, scoring=scorer2, cv=cv, verbose=0, n_jobs=-1)
    m8_grid_1.fit(X_train, y_train)

    if max_score < m8_grid_1.best_score_ :
        best_model8 = m8_grid_1.best_estimator_
        m8_best_t = t
        m8_best_grid_1 = m8_grid_1
        
m8_grid_1 = m8_best_grid_1
best_model8 = m8_grid_1.best_estimator_

print("Best Score : {}".format(m8_grid_1.best_score_))     
print("Threshold :", m8_best_t)
print("Best Params : {}".format(m8_grid_1.best_params_))

Best Score : 0.21929824561403502
Threshold : 0.6
Best Params : {'n_nonzero_coefs': 30}


### LARS lasso

In [31]:
model9 = LassoLars()

In [32]:
m9_params1 = {
    'alpha': [0.1, 1, 2, 5, 10, 20, 50, 100],
    'max_iter' : [n for n in range(800, 1601, 200)]
}

max_score=0
m9_best_t = 0
best_model9 = ""
m9_best_grid_1 = ""
for t in [0, 0.05, 0.1, 0.2, 0.25, 0.3, 0.45, 0.4, 0.45, 0.5, 0.6] :
    scorer2 = make_scorer(new_scorer, threshold=t)
    m9_grid_1 = GridSearchCV(model9, param_grid=m9_params1, scoring=scorer2, cv=cv, verbose=0, n_jobs=-1)
    m9_grid_1.fit(X_train, y_train)

    if max_score < m9_grid_1.best_score_ :
        best_model9 = m9_grid_1.best_estimator_
        m9_best_t = t
        m9_best_grid_1 = m9_grid_1

m9_grid_1 = m9_best_grid_1
best_model9 = m9_grid_1.best_estimator_

print("Best Score : {}".format(m9_grid_1.best_score_))     
print("Threshold :", m9_best_t)
print("Best Params : {}".format(m9_grid_1.best_params_))

Best Score : 0.21929824561403502
Threshold : 0.6
Best Params : {'alpha': 0.1, 'max_iter': 800}


### ExtraTree

In [33]:
model10 = ExtraTreesClassifier()

In [34]:
m10_params1 = {
    'max_depth' : [None, 3, 5, 7, 9],
    'n_estimators' : [10, 50, 100, 300, 500],
    'random_state' : [1213]
}

m10_grid_1 = GridSearchCV(model10, param_grid=m10_params1, scoring=scorer, cv=cv, verbose=0, n_jobs=-1)
m10_grid_1.fit(X_train, y_train)

best_model10 = m10_grid_1.best_estimator_

print("Best Score : {}".format(m10_grid_1.best_score_))
print("Best Params : {}".format(m10_grid_1.best_params_))

Best Score : 0.0
Best Params : {'n_estimators': 10, 'random_state': 1213}


### AdaBoost

In [35]:
model11 = AdaBoostClassifier()

In [36]:
m11_params1 = {
    'n_estimators' : [100, 300, 500],
    'learning_rate' : [0.01, 0.05, 0.1],
    'algorithm' :['SAMME.R'],
    'random_state' : [1213]
}

m11_grid_1 = GridSearchCV(model11, param_grid=m11_params1, scoring=scorer, cv=cv, verbose=0, n_jobs=-1)
m11_grid_1.fit(X_train, y_train)

best_model11 = m11_grid_1.best_estimator_

print("Best Score : {}".format(m11_grid_1.best_score_))
print("Best Params : {}".format(m11_grid_1.best_params_))

Best Score : 0.2032520325203252
Best Params : {'algorithm': 'SAMME.R', 'random_state': 1213}


### lightgbm

In [37]:
model12 = LGBMClassifier()

In [38]:
m12_params1 = {
    'max_depth' : [-1,5,7,9],
    'min_child_weight' : [0.5, 1, 5, 10],
    'colsample_bytree' : [0.6, 0.8, 1.0],
    'subsample' : [0.6, 0.8, 1.0],
    'probability' : [True],
    'learning_rate' : [0.05, 0.1],
    'n_estimators' : [100, 300, 500],
    'reg_alpha' : [0.0, 1.0, 5.0, 10.0], 
    'reg_lambda' : [0.0, 1.0, 5.0, 10.0],
    'random_state' : [1213]
}

m12_grid_1 = GridSearchCV(model12, param_grid=m12_params1, scoring=scorer, cv=cv, verbose=0, n_jobs=-1)
m12_grid_1.fit(X_train, y_train)

best_model12 = m12_grid_1.best_estimator_

print("Best Score : {}".format(m12_grid_1.best_score_))
print("Best Params : {}".format(m12_grid_1.best_params_))

Best Score : 0.0
Best Params : {'random_state': 1213}


### MLP

In [6]:
model13 = MLPClassifier()

In [18]:
m13_params1 = {
    "hidden_layer_sizes" : [(256,), (128,)],
    "activation" : ['relu'], 
    "solver" : ['adam'], 
    "alpha" : [0.0001],
    "batch_size" : [32], 
    "max_iter" : [200,300,500], 
    "shuffle" : [False],
    "random_state" : [1213]
}

m13_grid_1 = GridSearchCV(model13, param_grid=m13_params1, scoring=scorer, cv=cv, verbose=0, n_jobs=-1)
m13_grid_1.fit(X_train, y_train)

best_model13 = m13_grid_1.best_estimator_

print("Best Score : {}".format(m13_grid_1.best_score_))
print("Best Params : {}".format(m13_grid_1.best_params_))

Best Score : 0.0
Best Params : {'activation': 'relu', 'alpha': 0.0001, 'batch_size': 32, 'hidden_layer_sizes': (256,), 'max_iter': 200, 'random_state': 1213, 'shuffle': False, 'solver': 'adam'}


# Model Stacking

In [39]:
# layer1
models = [best_model1, best_model2, best_model3, best_model4, best_model5, best_model6, 
          best_model7, best_model8, best_model9, best_model10, best_model11, best_model12, best_model13]
S_train = stacking(models, X_train, [1,3,4,10,11,12])

meta_xgb = stacking_xgb(S_train, y_train, cv=cv, beta=0.5)
meta_logistic = stacking_logistic(S_train, y_train, cv=cv, beta=0.5)
meta_NN = stacking_NN(S_train, y_train, cv=cv)
meta_weight = stacking_weight(S_train, y_train, cv=cv)

y_pred_lst = []
y_pred_binary_lst =[]
y_pred_lst2 = []
y_pred_binary_lst2 =[]
threshold = "auto"

for meta in [meta_xgb, meta_logistic, meta_NN, meta_weight] :
    pred = meta.predict_proba(S_train)[:, 1]
    y_pred_lst.append(pred)
    y_pred_binary_lst.append(pred_to_binary(pred, threshold = threshold))

model 1 is stacked
model 3 is stacked
model 4 is stacked
model 10 is stacked
model 11 is stacked
model 12 is stacked


Best Score : 0.5555555555555556
Best Params : {'colsample_bytree': 0.5, 'gamma': 1.5, 'learning_rate': 0.01, 'max_depth': 2, 'min_child_weight': 0.5, 'n_estimators': 100, 'probability': True, 'random_state': 1213, 'subsample': 0.5}
Best Score : 1.0
Best Params : {'C': 0.1, 'max_iter': 100}
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [40]:
print(making_result(S_train, y_pred_lst, y_pred_binary_lst, y_pred_lst2, y_pred_binary_lst2, [1,3,4,10,11,12], [1,2,3,4], [], y_train))

   m_1       m_3  m_4  m_10          m_11  m_12   stack_1   stack_2   stack_3  \
0  0.5  0.492551  0.1   0.0  2.220446e-16   0.5  0.480149  0.494002  0.480036   
1  0.5  0.488819  0.7   1.0  1.000000e+00   0.5  0.488222  0.568988  0.528328   
2  0.5  0.487287  0.4   1.0  1.000000e+00   0.5  0.488222  0.563740  0.528154   
3  0.5  0.368855  0.1   0.0  2.220446e-16   0.5  0.480149  0.494190  0.475861   
4  0.5  0.409310  0.9   1.0  1.000000e+00   0.5  0.488222  0.572598  0.530995   
5  0.5  0.472056  0.1   0.0  2.220446e-16   0.5  0.480149  0.494033  0.479086   

    stack_4  stack_b_1  stack_b_2  stack_b_3  stack_b_4  Y  
0  0.486705        0.0        0.0        0.0        0.0  0  
1  0.811475        1.0        1.0        1.0        1.0  1  
2  0.780846        1.0        1.0        1.0        1.0  1  
3  0.477159        0.0        0.0        0.0        0.0  0  
4  0.826459        1.0        1.0        1.0        1.0  1  
5  0.485123        0.0        0.0        0.0        0.0  0  


In [None]:
# layer1 with vecstack
"""
models = [best_model1, best_model2, best_model3, best_model4, best_model5, best_model6, 
          best_model7, best_model8, best_model9, best_model10, best_model11, best_model12]
S_models = get_stacking_base_model(models, [1,4,10,11,12])

S_train, S_test = vecstack.stacking(S_models, X_train, y_train, X_test, regression = False, metric=scorer, n_folds=cv, needs_proba=True, random_state=1213)

meta_xgb = stacking_xgb(S_train, y_train, cv=cv, beta=0.5)
meta_logistic = stacking_logistic(S_train, y_train, cv=cv, beta=0.5)
meta_NN = stacking_NN(S_train, y_train, cv=cv)
meta_weight = stacking_weight(S_train, y_train, cv=cv)

y_pred_lst = []
y_pred_binary_lst =[]
y_pred_lst2 = []
y_pred_binary_lst2 =[]
threshold = "auto"

for meta in [meta_xgb, meta_logistic, meta_NN, meta_weight] :
    pred = meta.predict_proba(S_train)[:, 1]
    y_pred_lst.append(pred)
    y_pred_binary_lst.append(pred_to_binary(pred, threshold = threshold))
    
S_train = S_train[:,[idx+1 for idx in range(0,len([1,4,10,11,12])*2,2)]]
print(making_result(S_train, y_pred_lst, y_pred_binary_lst, y_pred_lst2, y_pred_binary_lst2, [1,4,10,11,12], [1,2,3,4], [], y_train))

"""

# Save

In [41]:
pickle.dump(best_model1, open(path+'/model/model1.pickle.dat', 'wb'))
pickle.dump(best_model2, open(path+'/model/model2.pickle.dat', 'wb'))
pickle.dump(best_model3, open(path+'/model/model3.pickle.dat', 'wb'))
pickle.dump(best_model4, open(path+'/model/model4.pickle.dat', 'wb'))
pickle.dump(best_model5, open(path+'/model/model5.pickle.dat', 'wb'))
pickle.dump(best_model6, open(path+'/model/model6.pickle.dat', 'wb'))
pickle.dump(best_model7, open(path+'/model/model7.pickle.dat', 'wb'))
pickle.dump(best_model8, open(path+'/model/model8.pickle.dat', 'wb'))
pickle.dump(best_model9, open(path+'/model/model9.pickle.dat', 'wb'))
pickle.dump(best_model10, open(path+'/model/model10.pickle.dat', 'wb'))
pickle.dump(best_model11, open(path+'/model/model11.pickle.dat', 'wb'))
pickle.dump(best_model12, open(path+'/model/model12.pickle.dat', 'wb'))
pickle.dump(best_model13, open(path+'/model/model13.pickle.dat', 'wb'))

In [42]:
pickle.dump(meta_xgb, open(path+'/model/meta_xgb.pickle.dat', 'wb'))
pickle.dump(meta_logistic, open(path+'/model/meta_logistic.pickle.dat', 'wb'))

meta_NN.model.save_weights(path+'/model/meta_NN.h5')
with open(path+'/model/meta_NN.json', 'w') as f :
    f.write(meta_NN.model.to_json())
    
meta_weight.model.save_weights(path+'/model/meta_weight.h5')
with open(path+'/model/meta_weight.json', 'w') as f :
    f.write(meta_weight.model.to_json())

# Loading & Prediction

In [43]:
model1 = pickle.load(open(path+'/model/model1.pickle.dat', 'rb'))
model2 = pickle.load(open(path+'/model/model2.pickle.dat', 'rb'))
model3 = pickle.load(open(path+'/model/model3.pickle.dat', 'rb'))
model4 = pickle.load(open(path+'/model/model4.pickle.dat', 'rb'))
model5 = pickle.load(open(path+'/model/model5.pickle.dat', 'rb'))
model6 = pickle.load(open(path+'/model/model6.pickle.dat', 'rb'))
model7 = pickle.load(open(path+'/model/model7.pickle.dat', 'rb'))
model8 = pickle.load(open(path+'/model/model8.pickle.dat', 'rb'))
model9 = pickle.load(open(path+'/model/model9.pickle.dat', 'rb'))
model10 = pickle.load(open(path+'/model/model10.pickle.dat', 'rb'))
model11 = pickle.load(open(path+'/model/model11.pickle.dat', 'rb'))
model12 = pickle.load(open(path+'/model/model12.pickle.dat', 'rb'))
model13 = pickle.load(open(path+'/model/model13.pickle.dat', 'rb'))

In [44]:
meta_xgb = pickle.load(open(path+'/model/meta_xgb.pickle.dat', 'rb'))
meta_logistic = pickle.load(open(path+'/model/meta_logistic.pickle.dat', 'rb'))

with open(path+'/model/meta_NN.json', 'r') as f :
    meta_NN = model_from_json(f.read())
meta_NN.model.load_weights(path+'/model/meta_NN.h5')

with open(path+'/model/meta_weight.json', 'r') as f :
    meta_weight = model_from_json(f.read())
meta_weight.model.load_weights(path+'/model/meta_weight.h5')

<br><br>

In [48]:
models = [model1, model2, model3, model4, model5, model6, model7, model8, model9, model10, model11, model12, model13]
models2 = [meta_xgb, meta_logistic, meta_NN, meta_weight]
models3 = []

threshold = "auto"
print("\n---------- Inference ----------")
print("Threshold :", threshold)

S_test = stacking(models, X_test, [1,3,4,10,11,12])
y_pred_lst = []
y_pred_binary_lst =[]

for meta in models2 :
    pred = meta.predict_proba(S_test)[:, 1]
    y_pred_lst.append(pred)
    y_pred_binary_lst.append(pred_to_binary(pred, threshold = threshold))

final, final_df = export_csv(patient_num, error_patient, y_pred_binary_lst, y_pred_lst, path = path, index=3)
print(making_result(S_test, y_pred_lst, y_pred_binary_lst, y_pred_lst2, y_pred_binary_lst2, [1,3,4,10,11,12], [1,2,3,4], [], final))


---------- Inference ----------
Threshold : auto
model 1 is stacked
model 3 is stacked
model 4 is stacked
model 10 is stacked
model 11 is stacked
model 12 is stacked


   m_1       m_3  m_4  m_10  m_11  m_12   stack_1   stack_2   stack_3  \
0  0.5  0.473655  0.6   0.9   1.0   0.5  0.488222  0.564081  0.535493   
1  0.5  0.000556  0.6   0.9   1.0   0.5  0.488222  0.564786  0.514244   

    stack_4  stack_b_1  stack_b_2  stack_b_3  stack_b_4    Y  
0  0.788075        0.0        0.0        1.0        1.0  1.0  
1  0.762623        0.0        1.0        0.0        0.0  0.0  
