In [None]:
!nvidia-smi

In [None]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import skimage.transform as st
import os
import gc
import warnings
from sklearn.metrics import classification_report, average_precision_score
from Parse_TFrecords import *
from define_model import *
from load_data import *
from utilities import *
 
print(tf.__version__)

warnings.filterwarnings("ignore")
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

gpus = tf.config.list_physical_devices(device_type='GPU')
tf.config.set_visible_devices(devices=gpus[0], device_type='GPU')

## Model Ensembling 

In [None]:
pos = 27297
neg = 119133

total = pos+neg

weight_for_0 = (1 / neg) * (total / 2.0)
weight_for_1 = (1 / pos) * (total / 2.0)

class_weight = {0: weight_for_0, 1: weight_for_1}

print('Weight for class 0: {:.2f}'.format(weight_for_0))
print('Weight for class 1: {:.2f}'.format(weight_for_1))

In [None]:
BATCH_SIZE = 32

record_file_train = 'copd_train_new.tfrecords'
train_dataset = (tf.data.TFRecordDataset(
    record_file_train, buffer_size=BATCH_SIZE*10, compression_type=None, num_parallel_reads=32)
.map(parse_TFrecord_train)
.shuffle(total)
.batch(BATCH_SIZE))

record_file_val = 'copd_val_new.tfrecords'
val_dataset = (tf.data.TFRecordDataset(
    record_file_val, buffer_size=BATCH_SIZE*10, compression_type=None, num_parallel_reads=32)
.map(parse_TFrecord_train)
.shuffle(36601)
.batch(BATCH_SIZE))

record_file_test = 'copd_test_new.tfrecords'
test_dataset = (tf.data.TFRecordDataset(
    record_file_test, buffer_size=BATCH_SIZE*10, compression_type=None, num_parallel_reads=32)
.map(parse_TFrecord_test)
.batch(BATCH_SIZE))

y_test = get_data_label('test')

## Train base model

In [None]:
archi = 'Dnet121'

checkpoint_filepath = 'checkpoints/Recall/checkpoint_Dnet121'
monitor_ = 'val_recall'
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_best_only=True,
    save_weights_only=True,
    monitor=monitor_,
    mode='max')
    
callback = [tf.keras.callbacks.LearningRateScheduler(scheduler),
            tf.keras.callbacks.EarlyStopping(mode='max', patience=3, monitor=monitor_),
            model_checkpoint_callback]

model = load_model_from_pretrain(archi)

# metric = tf.keras.metrics.PrecisionAtRecall(recall=0.5, name='precision_at_recall')
metric = tf.keras.metrics.Recall(name='recall')
# metric = tf.keras.metrics.Precision(name='precision')

model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                 optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3), metrics=metric)

model.fit(train_dataset, epochs=20, shuffle=True, validation_data=val_dataset, callbacks=callback, class_weight=class_weight)

y_preds = model.predict(test_dataset)

test_CI(y_preds, y_test)
    
thresh = get_thresh(y_test, y_preds, 'Youden')
print('thresh:', thresh)
        
print(classification_report(y_test, np.where(y_preds >= thresh, 1, 0)))
print(average_precision_score(y_test, y_preds, average=None))

thresh = get_thresh(y_test, y_preds, 'G-mean')
print('thresh:', thresh)
        
print(classification_report(y_test, np.where(y_preds >= thresh, 1, 0)))
print(average_precision_score(y_test, y_preds, average=None))

thresh = 0.5
print('thresh:', thresh)
        
print(classification_report(y_test, np.where(y_preds >= thresh, 1, 0)))
print(average_precision_score(y_test, y_preds, average=None))

gc.collect()

## Prediction-based ensemble

In [None]:
BATCH_SIZE = 32

record_file_test = 'copd_test_new.tfrecords'
test_dataset = (tf.data.TFRecordDataset(
    record_file_test, buffer_size=BATCH_SIZE*10, compression_type=None, num_parallel_reads=32)
.map(parse_TFrecord_test)
.batch(BATCH_SIZE))

y_test = get_data_label('test')

In [None]:
checkpoint_filepath = 'checkpoints/PrecisionAtRecall/checkpoint_IV3'
archi = 'IV3'
model1 = define_model(archi)
model1.load_weights(checkpoint_filepath)

checkpoint_filepath = 'checkpoints/PrecisionAtRecall/checkpoint_Dnet121'
archi = 'Dnet121'
model2 = define_model(archi)
model2.load_weights(checkpoint_filepath)

y_preds_1 = model1.predict(test_dataset)
y_preds_2 = model2.predict(test_dataset)

In [None]:
y_preds_vote = (y_preds_1*0.35 + y_preds_2*0.65)
    
y_preds_vote = np.array(y_preds_vote)
    
test_CI(y_preds_vote, y_test)

print(average_precision_score(y_test, y_preds_vote, average=None))
    
thresh = get_thresh(y_test, y_preds_vote, 'Youden')
print('thresh:', thresh)
        
print(classification_report(y_test, np.where(y_preds_vote >= thresh, 1, 0)))

thresh = get_thresh(y_test, y_preds_vote, 'G-mean')
print('thresh:', thresh)
        
print(classification_report(y_test, np.where(y_preds_vote >= thresh, 1, 0)))

thresh = 0.5
print('thresh:', thresh)
        
print(classification_report(y_test, np.where(y_preds_vote >= thresh, 1, 0)))

gc.collect()

## Model-based ensemble

In [None]:
checkpoint_filepath = 'checkpoints/AUC/checkpoint_BCE_IV3'
archi = 'IV3'
model1 = load_model_from_pretrain(archi)
model1.load_weights(checkpoint_filepath)

checkpoint_filepath = 'checkpoints/AUC/checkpoint_BCE_Dnet121'
archi = 'Dnet121'
model2 = load_model_from_pretrain(archi)
model2.load_weights(checkpoint_filepath)

model1 = tf.keras.Model(inputs=model1.input, outputs=model1.get_layer(model1.layers[-2].name).output)
model2 = tf.keras.Model(inputs=model2.input, outputs=model2.get_layer(model2.layers[-2].name).output)

features_1 = model1.predict(train_dataset)
features_2 = model2.predict(train_dataset)

features_val_1 = model1.predict(val_dataset)
features_val_2 = model2.predict(val_dataset)

In [None]:
checkpoint_filepath = 'checkpoints/AUC/checkpoint_ensemble_mlp'
monitor_ = 'val_auc'
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_best_only=True,
    save_weights_only=True,
    monitor=monitor_,
    mode='max')
    
callback = [tf.keras.callbacks.LearningRateScheduler(scheduler),
            tf.keras.callbacks.EarlyStopping(mode='max', patience=3, monitor=monitor_),
            model_checkpoint_callback]

ensemble_mlp = get_ensemble_mlp()

metric = tf.keras.metrics.AUC(name='auc')

ensemble_mlp.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                 optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3), metrics=metric)

ensemble_mlp.fit([features_IV3, features_D121], y_train, epochs=20, shuffle=True, validation_data=([features_val_IV3, features_val_D121],  y_val), callbacks=callback, class_weight=class_weight)

In [None]:
X_test_IV3 = InceptionV3.predict(test_dataset)
X_test_D121 = densenet121.predict(test_dataset)

y_preds = ensemble_mlp.predict([X_test_IV3, X_test_D121])

test_CI(y_preds, y_test)

thresh = get_thresh(y_test, y_preds)

print(classification_report(y_test, np.where(y_preds >= thresh, 1, 0)))
print(average_precision_score(y_test, y_preds, average=None))

gc.collect()