In [46]:
from utils.data_loader import train_data_loader, test_data_loader
from utils.inference_tools import pred_to_binary, export_csv, making_df

from xgboost import XGBClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.linear_model import LogisticRegression, Lasso, RidgeClassifier, SGDClassifier, Lars, LassoLars
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.metrics import fbeta_score, make_scorer

import pandas as pd
import numpy as np
import pickle
import datetime

import warnings
warnings.filterwarnings('ignore')

# Load Data and Pre-processing

In [9]:
# Print Current Time
time = str(datetime.datetime.now()).split()[1].split('.')[0]
print("Start:", time)


# Print Information
name = 'KHW'
model = 'Stacking'
summary = 'HyperParams tuning with 9 sklearn models'

print('Author Name :', name)
print('Model :', model)
print('Summary :', summary)
print("\n")


# Setting
pos_dir = "../data/train/positive/"
neg_dir = "../data/train/negative/"
test_dir = '../data/test/'

do_n4 = False
do_ws = True
do_resample = True

do_shuffle_train = True
do_shuffle_test = False
save_to_disk = False
return_patient_num_train = False
return_patient_num_test = True


# Data Load
X_train, y_train = train_data_loader(pos_dir, neg_dir, do_n4, do_ws, do_resample, do_shuffle_train, save_to_disk, return_patient_num_train)
X_test, patient_num = test_data_loader(test_dir, do_n4, do_ws, do_resample, do_shuffle_test, save_to_disk, return_patient_num_test)

Start: 09:07:45
Author Name : KHW
Model : Stacking
Summary : HyperParams tuning with 9 sklearn models


Processing [1/3] Image of Positive Patient... (09:07:45)
>>> Finished : Voxel Size Resampling (09:08:00)
>>> Unique Value of BRAIN mask : [0. 1.]
>>> Unique Value of INFARCT mask : [0. 1.]
>>> Finished : White-stripe Normalization (09:08:01)


GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated


Processing [2/3] Image of Positive Patient... (09:08:05)
>>> Finished : Voxel Size Resampling (09:08:20)
>>> Unique Value of BRAIN mask : [0. 1.]
>>> Unique Value of INFARCT mask : [0. 1.]
>>> Finished : White-stripe Normalization (09:08:21)


GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated


Processing [3/3] Image of Positive Patient... (09:08:24)
>>> Finished : Voxel Size Resampling (09:08:39)
>>> Unique Value of BRAIN mask : [0. 1.]
>>> Unique Value of INFARCT mask : [0. 1.]
>>> Finished : White-stripe Normalization (09:08:40)


GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated


Processing [1/3] Image of Negative Patient... (09:08:44)
>>> Finished : Voxel Size Resampling (09:08:59)
>>> Unique Value of BRAIN mask : [0. 1.]
>>> Unique Value of INFARCT mask : [0. 1.]
>>> Finished : White-stripe Normalization (09:09:01)


GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated


Processing [2/3] Image of Negative Patient... (09:09:04)
>>> Finished : Voxel Size Resampling (09:09:21)
>>> Unique Value of BRAIN mask : [0. 1.]
>>> Unique Value of INFARCT mask : [0. 1.]
>>> Finished : White-stripe Normalization (09:09:22)


GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated


Processing [3/3] Image of Negative Patient... (09:09:26)
>>> Finished : Voxel Size Resampling (09:09:41)
>>> Unique Value of BRAIN mask : [0. 1.]
>>> Unique Value of INFARCT mask : [0. 1.]
>>> Finished : White-stripe Normalization (09:09:43)


GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated


Created X of shape (6, 186) and y of shape (6,) (09:09:45)
Processing [1/2] Image of Test Patient... (09:09:46)
>>> Finished : Voxel Size Resampling (09:10:01)
>>> Unique Value of BRAIN mask : [0. 1.]
>>>Unique Value of INFARCT mask : [0. 1.]
>>> Finished : White-stripe Normalization (09:10:02)


GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated


Processing [2/2] Image of Test Patient... (09:11:03)
>>> Finished : Voxel Size Resampling (09:11:20)
>>> Unique Value of BRAIN mask : [0. 1.]
>>>Unique Value of INFARCT mask : [0. 1.]
>>> Finished : White-stripe Normalization (09:11:21)


GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated


# Base Model

### xgboost

In [None]:
# Fit Model with Training Data
model1 = XGBClassifier(n_jobs=4)
model1.fit(X_train, y_train)


# Save model to file
pickle.dump(model1, open('../data/model/model1.pickle.dat', 'wb'))

### svm

In [None]:
# Fit Model with Training Data
model2 = SVC()
model2.fit(X_train, y_train)


# Save model to file
pickle.dump(model2, open('../data/model/model2.pickle.dat', 'wb'))

### logistic regression

In [None]:
# Fit Model with Training Data
model3 = LogisticRegression(n_jobs=4)
model3.fit(X_train, y_train)


# Save model to file
pickle.dump(model3, open('../data/model/model3.pickle.dat', 'wb'))

### random forest

In [None]:
# Fit Model with Training Data
model4 = RandomForestClassifier(n_jobs=4)
model4.fit(X_train, y_train)


# Save model to file
pickle.dump(mode4l, open('../data/model/model4.pickle.dat', 'wb'))

<br><br><br>

# Score

In [13]:
def new_scorer(y_true, y_pred, threshold=0.5) :
    result = []

    for pred in list(y_pred) :
        if pred >= threshold :
            result.append(1)
        else :
            result.append(0)
            
    return fbeta_score(y_true, np.array(result), beta=0.5)

In [14]:
scorer = make_scorer(fbeta_score, beta=0.5)

# Modeling

### MLP

### CNN

### Separated

# Parameter Tuning & CV

### xgboost

In [15]:
model1 = XGBClassifier()

In [16]:
m1_params1 = {
    'max_depth' : [5,6,7,8],
    'min_child_weight' : [0.5, 1, 5, 10, 15, 20],
    'gamma' : [1.5, 2, 2.5, 3.0, 5],
    'subsample' : [0.5, 0.6, 0.8, 1.0],
    'colsample_bytree' : [0.5, 0.6, 0.8, 1.0],
    'probability' : [True],
    'learning_rate' : [0.01, 0.05, 0.1],
    'n_estimators' : [300, 500, 700]
}

m1_grid_1 = GridSearchCV(model1, param_grid=m1_params1, scoring=scorer, cv=2, verbose=0, n_jobs=-1)
m1_grid_1.fit(X_train, y_train)

best_model1 = m1_grid_1.best_estimator_

print("Best Score : {}".format(m1_grid_1.best_score_))
print("Best Params : {}".format(m1_grid_1.best_params_))

Best Score : 0.5555555555555556
Best Params : {'colsample_bytree': 0.5, 'gamma': 0, 'learning_rate': 0.01, 'max_depth': 3, 'min_child_weight': 1, 'n_estimators': 100, 'probability': True, 'subsample': 0.5}


### svm

In [17]:
model2 = SVC()

In [18]:
m2_params1 = {
    'C': [0.001, 0.01, 0.1, 1, 10, 100], 
    'gamma' : [0.001, 0.01, 0.1, 1, 2, 5, 10, 20],
    'degree' : [2,3,4],
    'probability' : [True]
}

m2_grid_1 = GridSearchCV(model2, param_grid=m2_params1, scoring=scorer, cv=2, verbose=0, n_jobs=-1)
m2_grid_1.fit(X_train, y_train)

best_model2 = m2_grid_1.best_estimator_

print("Best Score : {}".format(m2_grid_1.best_score_))
print("Best Params : {}".format(m2_grid_1.best_params_))

Best Score : 0.5555555555555556
Best Params : {'C': 0.001, 'degree': 2, 'gamma': 0.001, 'probability': True}


### logistic regression

In [19]:
model3 = LogisticRegression()

In [20]:
m3_params1 = {
    'C': [0.001, 0.01, 0.1, 1, 10, 100],
    'max_iter' : [n for n in range(100,1101, 200)],
}

m3_grid_1 = GridSearchCV(model3, param_grid=m3_params1, scoring=scorer, cv=2, verbose=0, n_jobs=-1)
m3_grid_1.fit(X_train, y_train)

best_model3 = m3_grid_1.best_estimator_

print("Best Score : {}".format(m3_grid_1.best_score_))
print("Best Params : {}".format(m3_grid_1.best_params_))

Best Score : 0.3703703703703704
Best Params : {'C': 0.001, 'max_iter': 100}


### random forest

In [21]:
model4 = RandomForestClassifier()

In [22]:
m4_params1 = {
    'max_depth' : [6, 8, 10, 15, 20, 30, 40, 50],
    'min_samples_leaf': [1, 2, 3, 4, 5,10, 20, 50],
    'n_estimators' : [100, 300, 500]
}

m4_grid_1 = GridSearchCV(model4, param_grid=m4_params1, scoring=scorer, cv=2, verbose=0, n_jobs=-1)
m4_grid_1.fit(X_train, y_train)

best_model4 = m4_grid_1.best_estimator_

print("Best Score : {}".format(m4_grid_1.best_score_))
print("Best Params : {}".format(m4_grid_1.best_params_))

Best Score : 0.5555555555555556
Best Params : {'max_depth': 6, 'min_samples_leaf': 4, 'n_estimators': 100}


### lasso regression

In [23]:
model5 = LogisticRegression()

In [24]:
m5_params1 = {
    'C': [0.001, 0.01, 0.1, 1, 10, 100],
    'max_iter' : [n for n in range(100,1101, 200)],
    'penalty' : ["l1"]
}

m5_grid_1 = GridSearchCV(model5, param_grid=m5_params1, scoring=scorer, cv=2, verbose=0, n_jobs=-1)
m5_grid_1.fit(X_train, y_train)

best_model5 = m3_grid_1.best_estimator_

print("Best Score : {}".format(m5_grid_1.best_score_))
print("Best Params : {}".format(m5_grid_1.best_params_))

Best Score : 0.3703703703703704
Best Params : {'C': 0.001, 'max_iter': 100, 'penalty': 'l1'}


### ridge regression

In [25]:
model6 = RidgeClassifier()

In [26]:
m6_params1 = {
    'alpha': [0.1, 1, 2, 5, 10, 20, 50, 100],
    'max_iter' : [None]+[n for n in range(100,1101, 200)]
}

m6_grid_1 = GridSearchCV(model6, param_grid=m6_params1, scoring=scorer, cv=2, verbose=0, n_jobs=-1)
m6_grid_1.fit(X_train, y_train)

best_model6 = m6_grid_1.best_estimator_

print("Best Score : {}".format(m6_grid_1.best_score_))
print("Best Params : {}".format(m6_grid_1.best_params_))

Best Score : 0.23809523809523805
Best Params : {'alpha': 0.1, 'max_iter': None}


### elasticNet

In [27]:
model7 = SGDClassifier()

In [28]:
m7_params1 = {
    'alpha': [0.001, 0.01, 0.1, 1, 2, 5, 10, 20, 50, 100],
    'l1_ratio':[0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7], 
    'max_iter' : [None]+[n for n in range(800, 1601, 200)],
    'penalty' : ["elasticnet"],
    'loss' : ["log"]
}

m7_grid_1 = GridSearchCV(model7, param_grid=m7_params1, scoring=scorer, cv=2, verbose=0, n_jobs=-1)
m7_grid_1.fit(X_train, y_train)

best_model7 = m7_grid_1.best_estimator_

print("Best Score : {}".format(m7_grid_1.best_score_))
print("Best Params : {}".format(m7_grid_1.best_params_))

Best Score : 0.888888888888889
Best Params : {'alpha': 0.001, 'l1_ratio': 0.1, 'loss': 'log', 'max_iter': 800, 'penalty': 'elasticnet'}


### LARS

In [29]:
model8 = Lars()

In [30]:
m8_params1 = {
    'n_nonzero_coefs': [n for n in range(30, 150, 20)]
}

max_score=0
m8_best_t = 0
best_model8 = ""
m8_best_grid_1 = ""

for t in [0, 0.05, 0.1, 0.2, 0.25, 0.3, 0.45, 0.4, 0.45, 0.5, 0.6] :
    scorer2 = make_scorer(new_scorer, threshold=t)
    m8_grid_1 = GridSearchCV(model8, param_grid=m8_params1, scoring=scorer2, cv=2, verbose=0, n_jobs=-1)
    m8_grid_1.fit(X_train, y_train)

    if max_score < m8_grid_1.best_score_ :
        best_model8 = m8_grid_1.best_estimator_
        m8_best_t = t
        m8_best_grid_1 = m8_grid_1
        
m8_grid_1 = m8_best_grid_1
best_model8 = m8_grid_1.best_estimator_

print("Best Score : {}".format(m8_grid_1.best_score_))     
print("Threshold :", m8_best_t)
print("Best Params : {}".format(m8_grid_1.best_params_))

Best Score : 0.4166666666666667
Threshold : 0.5
Best Params : {'n_nonzero_coefs': 30}


### LARS lasso

In [31]:
model9 = LassoLars()

In [32]:
m9_params1 = {
    'alpha': [0.1, 1, 2, 5, 10, 20, 50, 100],
    'max_iter' : [n for n in range(800, 1601, 200)]
}

max_score=0
m9_best_t = 0
best_model9 = ""
m9_best_grid_1 = ""
for t in [0, 0.05, 0.1, 0.2, 0.25, 0.3, 0.45, 0.4, 0.45, 0.5, 0.6] :
    scorer2 = make_scorer(new_scorer, threshold=t)
    m9_grid_1 = GridSearchCV(model9, param_grid=m9_params1, scoring=scorer2, cv=2, verbose=0, n_jobs=-1)
    m9_grid_1.fit(X_train, y_train)

    if max_score < m9_grid_1.best_score_ :
        best_model9 = m9_grid_1.best_estimator_
        m9_best_t = t
        m9_best_grid_1 = m9_grid_1

m9_grid_1 = m9_best_grid_1
best_model9 = m9_grid_1.best_estimator_

print("Best Score : {}".format(m9_grid_1.best_score_))     
print("Threshold :", m9_best_t)
print("Best Params : {}".format(m9_grid_1.best_params_))

Best Score : 0.1923076923076923
Threshold : 0.6
Best Params : {'alpha': 1, 'max_iter': 800}


### ExtraTree

In [33]:
model10 = ExtraTreesClassifier()

In [34]:
m10_params1 = {
    'max_depth' : [None, 3, 5, 7, 9],
    'n_estimators' : [10, 50, 100, 300, 500]
}

m10_grid_1 = GridSearchCV(model10, param_grid=m10_params1, scoring=scorer, cv=2, verbose=0, n_jobs=-1)
m10_grid_1.fit(X_train, y_train)

best_model10 = m10_grid_1.best_estimator_

print("Best Score : {}".format(m10_grid_1.best_score_))
print("Best Params : {}".format(m10_grid_1.best_params_))

Best Score : 0.3333333333333333
Best Params : {'max_depth': None, 'n_estimators': 10}


# Model Stacking

In [35]:
from keras.models import Sequential, model_from_json
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils

In [36]:
def stacking(models, data) : 
    result = []
    
    for idx, model in enumerate(models) :
        if idx+1 in [2,9, 6, 7] :
            continue
        if idx+1 in [8] :
            result.append(model.predict(data))
        else :
            result.append(model.predict_proba(data)[:,1])
        print("model", idx+1, "is stacked")
        
    return np.array(result).T

In [37]:
models = [best_model1, best_model2, best_model3, best_model4, best_model5, best_model6, best_model7, best_model8, best_model9, best_model10]
S_train = stacking(models, X_train)

model 1 is stacked
model 3 is stacked
model 4 is stacked
model 5 is stacked
model 6 is stacked
model 7 is stacked
model 8 is stacked
model 10 is stacked


### weight

### NN

In [38]:
def stack_fn(num_models=6):
    model = Sequential()
    model.add(Dense(16, input_dim=num_models, activation='relu'))
    model.add(Dense(16, input_dim=16, activation='relu'))
    model.add(Dense(2, activation='softmax'))

    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [39]:
meta_model = KerasClassifier(build_fn=stack_fn)
meta_model.fit(S_train, y_train, epochs=30)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7f7856dc77f0>

In [None]:
print(predict_train(S_train, meta_model.predict_proba(S_train), y_train))

# Save

In [40]:
pickle.dump(best_model1, open('../data/model/model1.pickle.dat', 'wb'))
pickle.dump(best_model2, open('../data/model/model2.pickle.dat', 'wb'))
pickle.dump(best_model3, open('../data/model/model3.pickle.dat', 'wb'))
pickle.dump(best_model4, open('../data/model/model4.pickle.dat', 'wb'))
pickle.dump(best_model5, open('../data/model/model5.pickle.dat', 'wb'))
pickle.dump(best_model6, open('../data/model/model6.pickle.dat', 'wb'))
pickle.dump(best_model7, open('../data/model/model7.pickle.dat', 'wb'))
pickle.dump(best_model8, open('../data/model/model8.pickle.dat', 'wb'))
pickle.dump(best_model9, open('../data/model/model9.pickle.dat', 'wb'))
pickle.dump(best_model10, open('../data/model/model10.pickle.dat', 'wb'))

In [41]:
meta_model.model.save_weights('../data/model/model_weights.h5')

with open('../data/model/model_architecture.json', 'w') as f :
    f.write(meta_model.model.to_json())

# Loading & Prediction

In [42]:
model1 = pickle.load(open('../data/model/model1.pickle.dat', 'rb'))
model2 = pickle.load(open('../data/model/model2.pickle.dat', 'rb'))
model3 = pickle.load(open('../data/model/model3.pickle.dat', 'rb'))
model4 = pickle.load(open('../data/model/model4.pickle.dat', 'rb'))
model5 = pickle.load(open('../data/model/model5.pickle.dat', 'rb'))
model6 = pickle.load(open('../data/model/model6.pickle.dat', 'rb'))
model7 = pickle.load(open('../data/model/model7.pickle.dat', 'rb'))
model8 = pickle.load(open('../data/model/model8.pickle.dat', 'rb'))
model9 = pickle.load(open('../data/model/model9.pickle.dat', 'rb'))
model10 = pickle.load(open('../data/model/model10.pickle.dat', 'rb'))

with open('../data/model/model_architecture.json', 'r') as f :
    meta = model_from_json(f.read())

meta.model.load_weights('../data/model/model_weights.h5')

In [48]:
# Make Predictions for Test Data
models = [model1, model2, model3, model4, model5, model6, model7, model8, model9, model10]
S_test = stacking(models, X_test)

threshold = 0.6
y_pred = meta.predict_proba(S_test)[:, 1]
y_pred_binary = pred_to_binary(y_pred, threshold = threshold)


# Make 'output.csv'
export_csv(patient_num, y_pred_binary, y_pred, path = "../data/output/")

model 1 is stacked
model 3 is stacked
model 4 is stacked
model 5 is stacked
model 6 is stacked
model 7 is stacked
model 8 is stacked
model 10 is stacked
