# K-Fold-Crossvalidation with a VGG16

## To Do
- get validation process straight
- hyperparameter-tuning

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import os
import shutil
print(os.listdir("/home/Deep_Learner/work/local/histopathologic-cancer-detection"))

from glob import glob 
from skimage.io import imread
import gc

from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split


import tensorflow as tf
from keras_preprocessing.image import ImageDataGenerator
from keras import applications, regularizers, optimizers
from keras.utils import to_categorical

In [None]:
import cv2
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.patches as patches

#visualize Augmentation from directory!
def looking_at_augmentation (data_generator, batchsize, path):
    im, label = next(data_generator)
    print(im)
    im = (im - np.min(im))/np.ptp(im) # to normalize all images --> matplotlib only takes pos. values between 0..1 / 0..255 
    print(im)
    imgs = list(im)
    labels = list(label)
    
    fig, ax = plt.subplots(ncols=3, nrows=3)
    fig.subplots_adjust(hspace=0.5)
    plt.suptitle('Augmented Images', fontsize=16)
    plt.figure(num=None, figsize=(50, 50), dpi=100, facecolor='w', edgecolor='k')

    for ax in ax.flatten():
        ax.axis('off')

    for i, im  in enumerate(imgs[:batchsize]):
        ax = fig.add_subplot(3,3,i+1)
        ax.imshow(im)
        fig.set_figheight(8)
        fig.set_figwidth(8)

    #fig.tight_layout()
    fig.savefig(base_path + '\\Augmented-Images.png', dpi=300)

#fast plot of training history
def plot_history(history, modelname, path):
    hist_df = pd.DataFrame(history.history)
    fig, axs = plt.subplots(nrows=2, sharex=True, figsize=(16, 10))
    axs[0].plot(hist_df.val_categorical_accuracy, lw=5, label='Validation Accuracy')
    axs[0].plot(hist_df.categorical_accuracy, lw=5, label='Training Accuracy')
    axs[0].set_ylabel('Accuracy')
    axs[0].set_xlabel('Epoch')
    axs[0].grid()
    axs[0].legend(loc=0)
    axs[1].plot(hist_df.val_loss, lw=5, label='Validation MLogLoss')
    axs[1].plot(hist_df.loss, lw=5, label='Training MLogLoss')
    axs[1].set_ylabel('MLogLoss')
    axs[1].set_xlabel('Epoch')
    axs[1].grid()
    axs[1].legend(loc=0)
    fig.savefig(path + '\History_{}.png' .format(modelname), dpi=300)
    hist_df.to_csv()
    plt.show();

from sklearn.metrics import roc_curve, roc_auc_score, auc#plotting the receiver operating characteristics --> evaluate performance cutting point vice
def plot_roc(label, predictions, modelname, path): #IDEA: set diffrent cutting point based on ROC for ensembling   
    roc_auc_score(label, predictions)
    print('The ROC-Score is: {}' .format(roc_auc_score))

    fpr_keras, tpr_keras, thresholds_keras = roc_curve(label, predictions)
    auc_keras = auc(fpr_keras, tpr_keras)
    #print(auc_keras)

    fig = plt.figure(1)
    plt.plot([0, 1], [0, 1], 'k--')
    plt.plot(fpr_keras, tpr_keras, label='area = {:.3f}'.format(auc_keras))
    plt.xlabel('False positive rate')
    plt.ylabel('True positive rate')
    plt.title('ROC curve: {}' .format(auc_keras))
    plt.legend(loc='best')
    fig.savefig(path + '\ROC-Curve_{}.png' .format(modelname), dpi=300) #saving PLOT 
    plt.show()
    
from sklearn.metrics import confusion_matrix
import itertools
# Source: Scikit Learn website
# http://scikit-learn.org/stable/auto_examples/
# model_selection/plot_confusion_matrix.html#sphx-glr-auto-examples-model-
# selection-plot-confusion-matrix-py
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.tight_layout()
    
#plotting correctly classified images: https://www.datacamp.com/community/tutorials/convolutional-neural-networks-python
def plot_correct(vals, y_pred, y_label, modelname, path):
    correct = np.where(y_pred==y_label)[0]
    print ("Found %d correct labels" % len(correct))


    fig, ax = plt.subplots(ncols=3, nrows=3)
    fig.subplots_adjust(hspace=0.5)
    plt.suptitle('Correct Images', fontsize=16)
    plt.figure(num=None, figsize=(50, 50), dpi=100, facecolor='w', edgecolor='k')

    for ax in ax.flatten():
        ax.axis('off')

    for i, correct in enumerate(correct[:9]):
        ax = fig.add_subplot(3,3,i+1)
        ax.imshow(vals[correct])
        ax.set_title("Predicted {}, Class {}".format(y_pred[correct], y_label[correct]), fontsize=10)
        fig.set_figheight(8)
        fig.set_figwidth(8)

    fig.savefig(path + '\Correct_Images_{}.png' .format(modelname), dpi=100) #saving PLOT 

#Plotting incorrectly classified
def plot_incorrect(vals, y_pred, y_label, modelname, path):
    incorrect = np.where(y_pred!=y_label)[0]
    print ("Found %d incorrect labels" % len(incorrect))


    fig, ax = plt.subplots(ncols=3, nrows=3)
    fig.subplots_adjust(hspace=1)
    plt.suptitle('Incorrect Images', fontsize=16)
    plt.figure(num=None, figsize=(50, 50), dpi=100, facecolor='w', edgecolor='k')

    for ax in ax.flatten():
        ax.axis('off')

    for i, incorrect in enumerate(incorrect[:9]):
        ax = fig.add_subplot(3,3,i+1)
        ax.imshow(vals[incorrect])
        ax.set_title("Predicted {}, Class {}".format(y_pred[incorrect], y_label[incorrect]), fontsize=10)
        fig.set_figheight(8)
        fig.set_figwidth(8)

    fig.savefig(path + '\Incorrect_Images_{}.png' .format(modelname), dpi=100) #saving PLOT 
    
def auc_roc(y_true, y_pred):
    # any tensorflow metric
    value, update_op = tf.contrib.metrics.streaming_auc(y_pred, y_true)

    # find all variables created for this metric
    metric_vars = [i for i in tf.local_variables() if 'auc_roc' in i.name.split('/')[1]]

    # Add metric variables to GLOBAL_VARIABLES collection.
    # They will be initialized for new session.
    for v in metric_vars:
        tf.add_to_collection(tf.GraphKeys.GLOBAL_VARIABLES, v)

    # force to update metric values
    with tf.control_dependencies([update_op]):
        value = tf.identity(value)
        return value
    
from scipy.misc import imread
#list of tiffs to array to plot_correct / incorrect-images + rounding predictions to compare to labels
def prep_im_label (val, y_pred):
    vals = []
    for i in val:
        vals.append(imread(i))
        #q = q+1
        #print(q)
        #print(i)
    vals = np.asarray(vals)    
    print(vals.shape)
    y_pred = np.round(y_pred)
    return(vals, y_pred)

In [None]:
import imgaug as ia
from imgaug import augmenters as iaa
sometimes = lambda aug: iaa.Sometimes(0.5, aug)
seq = iaa.Sequential(
    [
        # apply the following augmenters to most images
        iaa.Fliplr(0.5), # horizontally flip 50% of all images
        iaa.Flipud(0.5), # vertically flip 20% of all images
        sometimes(iaa.Affine(
            scale={"x": (0.9, 1.6), "y": (0.9, 1.6)}, #>20 will cut part of img
            translate_percent={"x": (-0.15, 0.15), "y": (-0.15, 0.15)}, # >20% will also cut part of img
            rotate=(-10, 10), # 45/-45° -> works good with scale + translate to prevent cuts
            shear=(-5, 5), # shear by -16 to +16 degrees
            mode=ia.ALL 
        )),
        iaa.SomeOf((0, 4), [
                sometimes(iaa.Superpixels(p_replace=(0.3, 0.7), n_segments=(10, 100))), #superpixel-representation --> better basallamina representation 
                iaa.OneOf([
                    iaa.GaussianBlur((0, 0.2)), #small blur effects --> better representation
                    iaa.AverageBlur(k=(1, 3)), # k must be odd
                    iaa.MedianBlur(k=(1, 3)), # 
                ]),
                iaa.Sharpen(alpha=(0, 1.0), lightness=(0.9, 1.1)), #cell wall represenation
                iaa.Emboss(alpha=(0, 0.8), strength=(0, 0.5)), #cell wall represenation
                #searching for edges or angles --> blobby mask --> better basallamina representation / nuclei
                iaa.SimplexNoiseAlpha(iaa.OneOf([
                    iaa.EdgeDetect(alpha=(0.2, 0.4)), #detects edges --> cell wall,..
                    iaa.DirectedEdgeDetect(alpha=(0.2, 0.4), direction=(0.0, 1.0)), #direction will make edges from random directions 
                ])),
                iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.01*255), per_channel=0.2), # add gaussian noise to images
             iaa.OneOf([
                    iaa.Dropout((0.05, 0.3), per_channel=0.2), #rnd remove 5% in small pixels
                    iaa.CoarseDropout((0.05, 0.3), size_percent=(0.01, 0.02), per_channel=0.2),# rnd remove 3% in big pixels
                ]),
                iaa.Invert(0.01, per_channel=True), # invert color channels
                iaa.Add((-10, 10), per_channel=0.3), # change brightness of images (by -10 to 10 of original value)
                #iaa.AddToHueAndSaturation((-0.1, 0.1)), # change hue and saturation
                #
                #either change the brightness of the whole image (sometimes per channel) or change the brightness of subareas
                iaa.OneOf([
                    iaa.Multiply((0.9, 1.2), per_channel=0.5),
                    iaa.FrequencyNoiseAlpha(
                        exponent=(-1, 0),
                        first=iaa.Multiply((0.9, 1.1), per_channel=True),
                        second=iaa.ContrastNormalization((0.9, 1.1))
                    )
                ]),
                sometimes(iaa.ElasticTransformation(alpha=(0, 0.5), sigma=0.1)), #still not sure: move pixels locally around
                sometimes(iaa.PiecewiseAffine(scale=(0.01, 0.03))), #still not sure:move parts of the image around
                sometimes(iaa.PerspectiveTransform(scale=(0.01, 0.1)))
            ],
                     random_order=True
        )
    ],
    random_order=True
)

### k-fold crossvalidation with tiles in RAM

In [None]:
#doesnt match the unshuffled numbers???
i=0
for train_index, test_index in skf.split(X, y):
    print("TRAIN:", train_index, "TEST:", test_index)
    print('[Fold %d/%d]' % (i + 1, kfold))
    i=i+1
    #slice training-data into folds for training/testing variables
    X_train, X_valid = X[train_index], X[test_index]
    y_train, y_valid = y[train_index], y[test_index]
    #get .fit values
    num_train_samples = len(X_train)
    num_val_samples = len(X_valid)  
    train_steps = np.ceil(num_train_samples // train_batch_size)
    val_steps = np.ceil(num_val_samples // val_batch_size)
    
    #get data-generators
    train_generator = datagen.flow(X_train, y_train, batch_size = train_batch_size)
    val_generator = datagen_test.flow(X_valid, y_valid, batch_size = val_batch_size)
    
    #Callbacks & Model
    name_weights = (base_path + "\Fold_" + str(i) + "{}.h5" .format(model_name))
    callbacks_list = get_callbacks(name_weights = name_weights)
    model_VGG = get_model_classif_VGG_base_nottrainable()
 
    #first on small batch_size to establish the basic informations
    history = model_VGG.fit_generator(
        train_generator,
        epochs=5, verbose=1,
        validation_data = val_generator,
        steps_per_epoch=train_steps,
        validation_steps=val_steps,
        callbacks=callbacks_list)

    #second round --> whole model with larger batchsize
    num_train_samples = len(X_train)
    num_val_samples = len(X_valid)  
    train_steps = np.ceil(num_train_samples // train_batch_size_2)
    val_steps = np.ceil(num_val_samples // val_batch_size)
    
    #get data-generators
    train_generator = datagen.flow(X_train, y_train, batch_size = train_batch_size_2)
    val_generator = datagen_test.flow(X_valid, y_valid, batch_size = val_batch_size) 
    model_VGG_bt = get_model_classif_VGG_base_trainable()
    model_VGG_bt.load_weights(name_weights)
        
    history = model_VGG_bt.fit_generator(
        train_generator,
        epochs=50, verbose=1,
        validation_data = val_generator,
        steps_per_epoch=train_steps,
        validation_steps=val_steps,
        callbacks=callbacks_list)
    
    print(model_VGG.evaluate(X_valid, y_valid))

### Create Folder Structure for tiles -> debugging purpose

In [None]:
# Setup data_frame for all data: Path, ID, Label
tiles_path = r"C:\Users\eg38emed\FCD\Tiles\Train_crossvalidation"

fcd = tiles_path + '\FCD'
tsc = tiles_path + '\TSC'

#fcd folder
df = pd.DataFrame({'path': glob(os.path.join(fcd,'*.png'))})
df['id'] = df.path.map(lambda x: x.split('\\')[7].split('.png')[0]) 
df['label'] = 0

#tsc folder
df_pos = pd.DataFrame({'path': glob(os.path.join(tsc,'*.png'))})
df_pos['id'] = df_pos.path.map(lambda x: x.split('\\')[7].split('.png')[0]) 
df_pos['label'] = 1

#concate
df_train = pd.concat([df, df_pos])

#add images
#df_train['image'] = df_train['path'].map(imread)

print(len(df_train))
print(df_train.head())

In [None]:
#prepare dummy arrays for stratified-k-fold -> only number of items matters
X = np.asarray(df_train['id'])
y = np.asarray(df_train['label'])
print(len(y))
print(len(X))

In [None]:
#initialize StratifiedKFold
from sklearn.model_selection import StratifiedKFold
kfold = 10
skf = StratifiedKFold(n_splits=kfold, random_state=101, shuffle = False)

train_batch_size_1 = 64
val_batch_size = 64

#datagenerators
datagen_train = ImageDataGenerator(preprocessing_function=seq.augment_image,
                            rescale=1./255)

datagen_val = ImageDataGenerator(rescale=1./255)

In [None]:
#Creating a new directory-structure --> flow_from_directory + better debugging
file_path = (r'C:\Users\eg38emed\FCD\kfold')
if not os.path.exists(file_path):
    os.makedirs(file_path)

i=0
for train_index, test_index in skf.split(X, y):
    print("TRAIN:", train_index, "TEST:", test_index)
    print('[Fold %d/%d]' % (i + 1, kfold))
    i=i+1
'''    y = to_categorical(y)
    #slice training-data into folds for training/testing variables
    X_train, X_valid = X[train_index], X[test_index]
    y_train, y_valid = y[train_index], y[test_index]
'''
    base_dir = (file_path + r'\base_dir_fold_' + str(i))
    print(base_dir)
    os.mkdir(base_dir)
    # train_dir
    train_dir = os.path.join(base_dir, 'train_dir')
    print(train_dir)
    os.mkdir(train_dir)

    # val_dir
    val_dir = os.path.join(base_dir, 'val_dir')
    os.mkdir(val_dir)

    # Inside each folder we create seperate folders for each class
    # create new folders inside train_dir
    fcd = os.path.join(train_dir, 'a_fcd')
    os.mkdir(fcd)
    tsc = os.path.join(train_dir, 'b_tsc')
    os.mkdir(tsc)

    # create new folders inside val_dir
    fcd = os.path.join(val_dir, 'a_fcd')
    os.mkdir(fcd)
    tsc = os.path.join(val_dir, 'b_tsc')
    os.mkdir(tsc)

    # check that the folders have been created
    print(os.listdir(base_dir + '\\train_dir'))
    print(os.listdir(base_dir + '\\val_dir'))    

    # Transfer the train images
    for j, image in enumerate(train_index):
        # get the label for a certain image
        print(image)
        path = df_train.iloc[image, 0]
        target = path.split('\\')[6]
        print(path)
        fname = df_train.iloc[image, 1]
        fname = fname + '.png'
        # these must match the folder names
        if target == 'FCD':
            label = 'a_fcd'
        if target == 'TSC':
            label = 'b_tsc'    
        # source path to image
        #src = os.path.join(path, fname)
        # destination path to image
        dst = os.path.join(train_dir, label, fname)
        # copy the image from the source to the destination
        shutil.copyfile(path, dst)


    # Transfer the val images
    for j, image in enumerate(test_index):
       # get the label for a certain image
        print('Val:', image)
        path = df_train.iloc[image, 0]
        target = path.split('\\')[6]
        print(path)
        fname = df_train.iloc[image, 1]
        fname = fname + '.png'
        # these must match the folder names
        if target == 'FCD':
            label = 'a_fcd'
        if target == 'TSC':
            label = 'b_tsc'   
        # source path to image
        #rc = os.path.join(path, fname)
        # destination path to image
        dst = os.path.join(val_dir, label, fname)
        # copy the image from the source to the destination
        shutil.copyfile(path, dst)

### k-fold crossvalidation on folders

In [None]:
#VGG-16 for heatmap generation 
from keras.metrics import categorical_accuracy
from keras.callbacks import CSVLogger

def get_model_classif_VGG_base_nottrainable():
    base_model_VGG = applications.VGG16(weights='imagenet', include_top=False, input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3))
    print('Model loaded.')
    
    for layer in base_model_VGG.layers:
        layer.trainable = False
        print("trainable:", layer.name)
    
    x = base_model_VGG.output
    x = GlobalMaxPooling2D()(x)
    x = (Dense(256, activation='relu'))(x)
    x = (Dropout(0.2)) (x)
    predictions = (Dense(2, activation='softmax'))(x)
    model_VGG = Model(inputs=base_model_VGG.input, outputs=predictions)

    adam = optimizers.Adam()
    model_VGG.compile(loss='categorical_crossentropy',
                  optimizer=adam,
                  metrics=[categorical_accuracy, auc_roc])

    model_VGG.summary()
    return model_VGG

def get_model_classif_VGG_base_trainable():
    base_model_VGG = applications.VGG16(weights='imagenet', include_top=False, input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3))
    print('Model loaded.')
    
    for layer in base_model_VGG.layers:
        layer.trainable = True
        print("trainable:", layer.name)
    
    x = base_model_VGG.output
    x = GlobalMaxPooling2D()(x)
    x = (Dense(256, activation='relu'))(x)
    x = (Dropout(0.5)) (x)
    predictions = (Dense(2, activation='softmax'))(x)
    model_VGG = Model(inputs=base_model_VGG.input, outputs=predictions)

    adam = optimizers.Adam()
    model_VGG.compile(loss='categorical_crossentropy',
                  optimizer=adam,
                  metrics=[categorical_accuracy, auc_roc])

    model_VGG.summary()
    return model_VGG

import clr_callback
def get_callbacks_clr(name_weights):
    ReduceLR = ReduceLROnPlateau(monitor='val_categorical_accuracy', factor=0.5, patience=5, verbose=1, mode='auto', cooldown=5, min_lr=0.00005)
    earlystopping = EarlyStopping(monitor='val_categorical_accuracy', min_delta=0.001, patience=15, verbose=1, mode='auto')
    checkpoint = ModelCheckpoint(name_weights, monitor='val_categorical_accuracy', verbose=1, save_best_only=True, save_weights_only=True, mode='max')
    tensorboard_callback = TensorBoard(log_dir=base_path+ "logs\\{}".format(time()), 
            histogram_freq=0, #batch_size=32, 
            write_graph=True, write_grads=False, write_images=True, 
            embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None)
    clr = clr_callback.CyclicLR(base_lr=0.0001, max_lr=0.0005,
                        step_size=num_train_samples/train_batch_size_1/2)
    csv_logger = CSVLogger(filename = base_path + "model_history_{}_toplayer.csv".format(modelname), append=True)
    return [clr, earlystopping, ReduceLR, checkpoint, tensorboard_callback, csv_logger]

def get_callbacks(name_weights):
    ReduceLR = ReduceLROnPlateau(monitor='val_categorical_accuracy', factor=0.5, patience=5, verbose=1, mode='auto', cooldown=5, min_lr=0.00005)
    earlystopping = EarlyStopping(monitor='val_categorical_accuracy', min_delta=0.001, patience=15, verbose=1, mode='auto')
    checkpoint = ModelCheckpoint(name_weights, monitor='val_categorical_accuracy', verbose=1, save_best_only=True, save_weights_only=True, mode='max')
    tensorboard_callback = TensorBoard(log_dir=base_path+ "logs\\{}".format(time()), 
            histogram_freq=0, #batch_size=32, 
            write_graph=True, write_grads=False, write_images=True, 
            embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None)
    csv_logger = CSVLogger(filename = base_path + "model_history_{}_ALL.csv".format(modelname), append=True)
    return [earlystopping, ReduceLR, checkpoint, tensorboard_callback,  csv_logger]

In [None]:
#with CSV_Logger
i = 0
kfold = 10
while i <= kfold: 
    i +=1
    if i == 11:
        break
    IMAGE_SIZE = 200
    IMAGE_CHANNELS = 3
    model_name = ('VGG_kfold_{}'.format(i))
   
    base_path = (r'C:\Users\eg38emed\FCD\kfold\base_dir_fold_{}\\'.format(i)) 
    print('Training Model: {} on Fold: {}'.format(model_name, i))
    train_path = (base_path + 'train_dir') 
    val_path = (base_path + 'val_dir')

    train_batch_size_1 = 128
    val_batch_size = 64

    datagen_train = ImageDataGenerator(preprocessing_function=seq.augment_image,
                                rescale=1./255)

    datagen_val = ImageDataGenerator(rescale=1./255)



    train_gen_1 = datagen_train.flow_from_directory(train_path,
                                            target_size=(IMAGE_SIZE,IMAGE_SIZE),
                                            batch_size=train_batch_size_1,
                                            class_mode='categorical')

    val_gen = datagen_val.flow_from_directory(val_path,
                                            target_size=(IMAGE_SIZE,IMAGE_SIZE),
                                            batch_size=val_batch_size,
                                            class_mode='categorical')

    #train toplayer with cyclic
    model_VGG = get_model_classif_VGG_base_nottrainable()
    name_weights = (base_path + "\{}.h5".format(model_name))
    callbacks_list = get_callbacks_clr(name_weights = name_weights)
    
    history = model_VGG.fit_generator(train_gen_1, 
                    steps_per_epoch=train_gen_1.samples // train_batch_size_1, 
                    validation_data=val_gen,
                    validation_steps=val_gen.samples // val_batch_size,
                    epochs=10, verbose=1,
                    callbacks=callbacks_list)
    plot_history(history, modelname = ('{}_toplayer'.format(model_name)), path=base_path)
    
    #FINETUNE ALL VGG with fixed LR
    model_VGG = get_model_classif_VGG_base_trainable()
    model_VGG.load_weights(base_path + "\{}.h5".format(model_name))
    name_weights = (base_path + "\{}_ALL.h5".format(model_name))
    callbacks_list = get_callbacks(name_weights = name_weights)
    
    history = model_VGG.fit_generator(train_gen_1, 
                    steps_per_epoch=train_gen_1.samples // train_batch_size_1, 
                    validation_data=val_gen,
                    validation_steps=val_gen.samples // val_batch_size,
                    epochs=30, verbose=1,
                    callbacks=callbacks_list)
    plot_history(history, modelname = ('{}_All'.format(model_name)), path=base_path) 

### Retrain some Models

In [None]:
#1,2,3,4,6,7,8: >90% with 100x100, trained toplayer (0.5 DO, cyclic lr) -> trained model (0.5 DO, 0.0001 lr)
#retrain later: 9: 82% 5: 84%
#retraining some Models: 10: 65% all / 75% in with trained toplayer
i = 4
IMAGE_SIZE = 272
IMAGE_CHANNELS = 3
model_name = ('VGG_kfold_{}'.format(i))

base_path = (r'C:\Users\eg38emed\FCD\kfold\base_dir_fold_{}\\'.format(i)) 
print('Training Model: {} on Fold: {}'.format(model_name, i))
print(base_path)
train_path = (base_path + 'train_dir') 
val_path = (base_path + 'val_dir')

train_batch_size_1 = 64
val_batch_size = 64

datagen_train = ImageDataGenerator(preprocessing_function=seq.augment_image,
                            rescale=1./255)

datagen_val = ImageDataGenerator(rescale=1./255)

train_gen_1 = datagen_train.flow_from_directory(train_path,
                                        target_size=(IMAGE_SIZE,IMAGE_SIZE),
                                        batch_size=train_batch_size_1,
                                        class_mode='categorical')

val_gen = datagen_val.flow_from_directory(val_path,
                                        target_size=(IMAGE_SIZE,IMAGE_SIZE),
                                        batch_size=val_batch_size,
                                        class_mode='categorical')

In [None]:
#train toplayer with cyclic
model_VGG = get_model_classif_VGG_base_nottrainable()
#model_VGG.load_weights(base_path + "{}.h5".format(model_name))
name_weights = (base_path + "{}.h5".format(model_name))
callbacks_list = get_callbacks_clr(name_weights = name_weights)

history = model_VGG.fit_generator(train_gen_1, 
                steps_per_epoch=train_gen_1.samples // train_batch_size_1, 
                validation_data=val_gen,
                validation_steps=val_gen.samples // val_batch_size,
                epochs=10, verbose=1,
                callbacks=callbacks_list)
plot_history(history, modelname = ('{}_toplayer'.format(model_name)), path=base_path)

In [None]:
#FINETUNE ALL VGG with fixed LR
model_VGG = get_model_classif_VGG_base_trainable()
model_VGG.load_weights(base_path + "{}_ALL.h5".format(model_name))
name_weights = (base_path + "{}_ALL_272_CLR.h5".format(model_name))
callbacks_list = get_callbacks_clr(name_weights = name_weights)

history = model_VGG.fit_generator(train_gen_1, 
                steps_per_epoch=train_gen_1.samples // train_batch_size_1, 
                validation_data=val_gen,
                validation_steps=val_gen.samples // val_batch_size,
                epochs=4, verbose=1,
                callbacks=callbacks_list)

plot_history(history, modelname = ('{}_All_272_CLR'.format(model_name)), path=base_path) 