In [1]:
from utils.data_loader import train_data_loader, test_data_loader, data_generator
from utils.inference_tools import pred_to_binary, export_csv, making_result,  error_check
from utils.model_stacking import *
from utils.cube_tools import *
import vecstack
from glob import glob

from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, AdaBoostClassifier
from sklearn.linear_model import LogisticRegression, Lasso, RidgeClassifier, SGDClassifier, Lars, LassoLars
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.metrics import fbeta_score, make_scorer

from keras.models import Sequential, model_from_json
from keras.layers import Dense, Dropout, Conv3D, Flatten, pooling
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils 

import pandas as pd
import numpy as np
import pickle
import datetime

import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

  from ._conv import register_converters as _register_converters
  from pandas.core import datetools
Using TensorFlow backend.


# Load Data and Pre-processing

In [2]:
# Setting
path = "./data"
pos_dir = path+"/train/positive/"
neg_dir = path+"/train/negative/"

save_dir = path+"/model/"
test_dir = path+'/test/'

features = ['firstorder', 'shape']
target_voxel = (0.65, 0.65, 3)

In [3]:
norm = 'new'
do_resample = True
do_shuffle = True
do_minmax = True

X_train, y_train = train_data_loader(pos_dir, neg_dir, norm, do_resample, do_shuffle, do_minmax, features, target_voxel, path=path)

Created X of shape (6, 64) and y of shape (6,) (12:10:17)


In [3]:
norm = 'new'
do_resample = True
do_minmax = True

X_test, patient_num, error_patient = test_data_loader(test_dir, norm, do_resample, do_minmax, features, target_voxel, path=path)

<br><br><br>

# Modeling

In [6]:
X_train.shape

(6, 64)

### MLP

In [7]:
def dl_mlp(X_train, y_train, num_units=256, hidden_layers=3, epochs=30, loss="cross_entropy_loss") :
    
    def stack_fn(num_models=X_train.shape[1], num_units=num_units, hidden_layers=hidden_layers, loss=loss):
        model = Sequential()
        
        for _ in range(hidden_layers) :
            model.add(Dense(num_units, input_dim=num_models, activation='relu'))
            model.add(Dropout(0.5))
        
        model.add(Dense(32, input_dim=num_units, activation='relu'))
        model.add(Dense(2, activation='softmax'))
        
        if loss == 'cross_entropy_loss' :
            model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
        elif loss == 'focal_loss' :
            model.compile(loss=focal_loss(), optimizer='adam', metrics=['accuracy'])
        return model
    
    MLP_model = KerasClassifier(build_fn=stack_fn)    
    MLP_model.fit(X_train, y_train, epochs=epochs)
    return MLP_model

In [8]:
MLP = dl_mlp(X_train, y_train, num_units=256, hidden_layers=3, epochs=30, loss="cross_entropy_loss")

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


### CNN

In [7]:
epochs = 1
batch_size = 4
cube_shape = (32, 32, 16)
mode = 'train'
norm="new"
target_voxel = (0.65, 0.65, 3)

In [10]:
path = './data'

data_dir = sorted(glob(os.path.join(path, mode, '*', '*')))
data_dir, error_patient = error_check(data_dir)
data_gen = data_generator(batch_size, mode, data_dir, cube_shape, norm, target_voxel)

In [11]:
def dl_cnn(data_gen, cube_shape=(32,32,16), batch_size=4, epochs=20) :
    input_shape = cube_shape + (2,)
    steps_per_epoch =255//batch_size
    
    model = Sequential()
    model.add(Conv3D(32, (3,3,3), activation='relu', input_shape = input_shape))
    model.add(Conv3D(32, (3,3,3), activation='relu'))
    model.add(pooling.MaxPooling3D(pool_size=(2,2,2)))
    
    model.add(Conv3D(32, (3,3,3), activation='relu'))
    model.add(Conv3D(32, (3,3,3), activation='relu'))
    model.add(pooling.MaxPooling3D(pool_size=(2,2,2)))
    
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    model.fit_generator(data_gen, steps_per_epoch, epochs, shuffle=False)  
    return model

In [None]:
CNN = dl_cnn(data_gen, cube_shape=(32,32,16), batch_size=4, epochs=1)

# Save

In [None]:
MLP.model.save_weights(path+'/model/MLP.h5')
with open(path+'/model/MLP.json', 'w') as f :
    f.write(MLP.model.to_json())

In [None]:
CNN.model.save_weights(path+'/model/CNN.h5')
with open(path+'/model/CNN.json', 'w') as f :
    f.write(CNN.model.to_json())

# Loading & Prediction

In [4]:
with open(path+'/model/MLP.json', 'r') as f :
    MLP = model_from_json(f.read())
MLP.model.load_weights(path+'/model/MLP.h5')

In [5]:
with open(path+'/model/CNN.json', 'r') as f :
    CNN = model_from_json(f.read())
CNN.model.load_weights(path+'/model/CNN.h5')

<br><br>

In [11]:
threshold = "auto"
mode="test"
path = './data'

data_dir = sorted(glob(os.path.join(path, mode, '*')))
data_dir, error_patient = error_check(data_dir)
data_gen = data_generator(batch_size, mode, data_dir, cube_shape, norm, target_voxel)

In [None]:
y_pred_lst = []
y_pred_binary_lst =[]

pred = MLP.predict_proba(X_test)[:, 1]
y_pred_lst.append(pred)
y_pred_binary_lst.append(pred_to_binary(pred, threshold = threshold))
    
pred = CNN.predict_generator(data_gen, steps=255//batch_size, verbose=1)
y_pred_lst.append(pred)
y_pred_binary_lst.append(pred_to_binary(pred, threshold = threshold))  

final, final_df = export_csv(patient_num, error_patient, y_pred_binary_lst, y_pred_lst, path = path, index=1)
print(final_df)