In [None]:
from keras import applications
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import Adam
from keras.models import Model
from keras.layers import Input, Convolution2D, MaxPool2D, Dense, Flatten, Dropout
from keras.callbacks import TensorBoard, ModelCheckpoint, EarlyStopping
from keras.utils.vis_utils import plot_model

import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

from pathlib import Path
import pandas as pd
import numpy as np

from datetime import datetime
import os

from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

fold_num = 1

# Image size
img_width, img_height, img_depth = 224, 224, 3
data_dir = Path('data_aug/fold_' + str(fold_num) + '/combined')
csv_dir = Path('data_aug/fold_' + str(fold_num))

# Data organization
csv_name = 'dr_train_' + str(fold_num) + '.csv'
csv_path = csv_dir / csv_name
df_train = pd.read_csv(csv_path)
df_train = df_train.sample(frac = 1, random_state = fold_num)

csv_name = 'dr_valid_' + str(fold_num) + '.csv'
csv_path = csv_dir / csv_name
df_valid = pd.read_csv(csv_path)
df_valid = df_train.sample(frac = 1, random_state = fold_num)

# Training parameters
epochs = 50
freq = 20
batch_size = 28
num_classes = 3
train_valid_split = 0.2
nb_train_samples = int(len(df_train) * (1-train_valid_split))
nb_valid_samples = len(df_train) - nb_train_samples

class_weights = {0: 2.,
                1: 1.,
                2: 1.}

if num_classes < 2:
    act_type = 'sigmoid'
    class_mode = 'binary'
    loss_fun = 'binary_crossentropy'
else:
    act_type = 'softmax'
    class_mode = 'categorical'
    loss_fun = 'categorical_crossentropy'
    
print('Type of classification: ', class_mode)
print('Loss function: ', loss_fun)
print('Activation function: ', act_type)

# ImageDataGenerator
datagen = ImageDataGenerator(
    rescale = 1./255,
    horizontal_flip = True,
    vertical_flip = True,
    rotation_range = 90,
    fill_mode = 'constant')

# For csvs (dataframes)
# train_generator = datagen.flow_from_dataframe(
#     dataframe = df_train,
#     directory = data_dir,
#     target_size = (img_width, img_height),
#     x_col = 'Image',
#     y_col = 'Class',
#     class_mode = class_mode,
#     color_mode = 'rgb',
#     batch_size = batch_size,
#     shuffle = True,
#     subset = 'training')

# valid_generator = datagen.flow_from_dataframe(
#     dataframe = df_train,
#     directory = data_dir,
#     target_size = (img_width, img_height),
#     x_col = 'Image',
#     y_col = 'Class',
#     class_mode = class_mode,
#     color_mode = 'rgb',
#     batch_size = batch_size,
#     shuffle = True,
#     subset = 'validation')

train_generator = datagen.flow_from_dataframe(
    dataframe = df_train,
    directory = data_dir,
    target_size = (img_width, img_height),
    x_col = 'Image',
    y_col = 'Class',
    class_mode = class_mode,
    color_mode = 'rgb',
    batch_size = batch_size,
    shuffle = True)

valid_generator = datagen.flow_from_dataframe(
    dataframe = df_valid,
    directory = data_dir,
    target_size = (img_width, img_height),
    x_col = 'Image',
    y_col = 'Class',
    class_mode = class_mode,
    color_mode = 'rgb',
    batch_size = batch_size,
    shuffle = True)


# For directory

# Preview the augmented data
X_preview, y_preview = train_generator.next()

for k in range(1,7):
    sample_img = X_preview[k,:,:,:]
    plt.subplot(2, 3, k)
    plt.imshow(sample_img)
plt.suptitle('Sample Data Augmentation', fontsize = 16)    
plt.show()

print('y Labels: ', y_preview[1:5])
print('sample img, max value: ',np.max(sample_img))
print('sample img, min value: ',np.min(sample_img))

# Build Model
image_input = Input(shape = (img_width, img_height, img_depth))
base_model = applications.vgg16.VGG16(input_tensor = image_input,
                                 include_top = False,
                                 weights = 'imagenet')

base_output = base_model.layers[-1].output

flat1 = Flatten()(base_output)
fc1 = Dense(8192, activation = 'relu')(flat1)
dropfc1 = Dropout(0.5)(fc1)
fc2 = Dense(4096, activation = 'relu')(dropfc1)
dropfc2 = Dropout(0.5)(fc2)

output = Dense(num_classes, activation = act_type)(dropfc2)

for layer in base_model.layers[:-9]:
    layer.trainable = False

model = Model(image_input, output)

# model.summary()

# Compile the model
opt = Adam(lr = 0.00001)
model.compile(loss = loss_fun, optimizer = opt, metrics = ['accuracy'])

# Folder setup
init_time = datetime.now()
current_time = init_time.strftime('%Y%m%d_%H%M%S')
name_dir = 'trained_models_' + current_time + '_fold_num' + str(fold_num)
os.mkdir(name_dir)

# Callbacks1: ModelCheckpointer
model_file_format = name_dir + '/model_' + str(fold_num) + '_model.{epoch:04d}.hdf5'
check = ModelCheckpoint(model_file_format, period = freq, verbose = 1)

# Callbacks2: TensorBoard
tensor_check = './logs/' + current_time + '_train_testsplit' + str(fold_num)
tensor_board = TensorBoard(tensor_check)

# Callbacks3: EarlyStopping
early_stop = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=0, mode='auto')
# Plot model
figure_name = name_dir + '/model_output.png'
plot_model(model, figure_name, show_shapes = True)

# Display model
model.summary()

history = model.fit_generator(train_generator,
                              epochs = epochs,
                              steps_per_epoch = nb_train_samples // batch_size,
                              validation_data = valid_generator,
                              validation_steps = nb_valid_samples // batch_size,
                              callbacks = [check, tensor_board],
                              class_weight = class_weights)

early_stop_name = name_dir + '/fold_num_' + str(fold_num) + 'early_stop_model.hdf5'
model.save_weights(early_stop_name)

import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

def plot_confusion_matrix(y_true, y_pred, classes,
                          normalize=False,
                          title=None,
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if not title:
        if normalize:
            title = 'Normalized confusion matrix'
        else:
            title = 'Confusion matrix, without normalization'

    # Compute confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    # Only use the labels that appear in the data
    classes = classes#[unique_labels(y_true, y_pred)]
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    fig, ax = plt.subplots()
    im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
    ax.figure.colorbar(im, ax=ax)
    # We want to show all ticks...
    ax.set(xticks=np.arange(cm.shape[1]),
           yticks=np.arange(cm.shape[0]),
           # ... and label them with the respective list entries
           xticklabels=classes, yticklabels=classes,
           title=title,
           ylabel='True label',
           xlabel='Predicted label')

    # Rotate the tick labels and set their alignment.
    plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
             rotation_mode="anchor")

    # Loop over data dimensions and create text annotations.
    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            ax.text(j, i, format(cm[i, j], fmt),
                    ha="center", va="center",
                    color="white" if cm[i, j] > thresh else "black")
    fig.tight_layout()
    return ax

# Data organization
test_data_dir = Path('data/combined')
csv_name = 'MAIN_dr_test_' + str(fold_num) + '.csv'
csv_path = Path('data') / csv_name
df_test = pd.read_csv(csv_path)
df_test = df_test.sample(frac = 1, random_state = fold_num)

class_names = ['A. Control', 'B. NoDR', 'C. DR']

test_datagen = ImageDataGenerator(
    rescale=1./255,
    horizontal_flip = True,
    vertical_flip = True,
    zoom_range = 0.2,
    rotation_range = 90,
    fill_mode = 'constant')

test_generator = test_datagen.flow_from_dataframe(dataframe = df_test,
                                                 directory = test_data_dir,
                                                 target_size = (img_width, img_height),
                                                  x_col = 'Image',
                                                  y_col = 'Class',
                                                  class_mode = class_mode,
                                                  color_mode = 'rgb',
                                                  batch_size = len(df_test))
X, y_true = [], []
i = 0
while i < 30:
    temp_X, temp_y_true = test_generator.next()
    if i < 1:
        X = temp_X
        y_true = temp_y_true
    else:
        X = np.concatenate((X, temp_X), axis=0)
        y_true = np.concatenate((y_true, temp_y_true), axis = 0)
    i += 1

# print(np.shape(X))
# print(np.shape(y_true))

if len(class_names) == 2:
    y_pred = np.around(model.predict(X),0)
    y_pred = np.reshape(y_pred, (1, len(y_pred)))
    y_pred = y_pred[0]
else:
    y_pred = model.predict(X)
    
title_mat = 'Confusion matrix, with normalization, fold: ' + str(fold_num)
plot_name = name_dir + '/confusion_matrix_fold' + str(fold_num) + '.png'
if len(class_names) == 2:
    print('Binary Classification')
    print('---------------------')
    plot_confusion_matrix(y_true,
                          y_pred,
                          classes = class_names, 
                          normalize = True,
                          title = title_mat)
    plt.savefig(plot_name)
    cm = confusion_matrix(y_true, y_pred)
    print(cm)
    
else:
    print('Categorical Classification')
    print('---------------------')
    plot_confusion_matrix(y_true.argmax(axis=1),
                          y_pred.argmax(axis=1),
                          classes = class_names,
                          normalize = True,
                          title = title_mat)
    plt.savefig(plot_name)
    cm = confusion_matrix(y_true.argmax(axis=1), y_pred.argmax(axis=1))
    print(cm)

