<a href="https://colab.research.google.com/github/IverMartinsen/MastersThesis/blob/main/Notebooks/cod_otoliths_cross_validation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!git clone https://github.com/IverMartinsen/MastersThesis.git

Cloning into 'MastersThesis'...
remote: Enumerating objects: 566, done.[K
remote: Counting objects: 100% (566/566), done.[K
remote: Compressing objects: 100% (402/402), done.[K
remote: Total 566 (delta 202), reused 504 (delta 140), pack-reused 0[K
Receiving objects: 100% (566/566), 6.83 MiB | 22.64 MiB/s, done.
Resolving deltas: 100% (202/202), done.


In [None]:
#@title Import modules and images { form-width: "40%" }

import sys
sys.path.append(r'/content/MastersThesis/Python')      

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from modules.imageloader import load_images
from tensorflow.keras.layers import Dense, Flatten, GlobalAveragePooling2D, Dropout
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
from tensorflow.keras.layers import RandomZoom, RandomRotation, RandomFlip

# turn off interactive plotting to avoid plots popping up during trials
plt.ioff()

# path to image folder
path = r'/content/drive/MyDrive/standard_convex'

img_size = (128, 128)
img_shape = img_size + (3,)
num_splits = 5
batch_size = 32
initial_epochs = 100

# import images
sets = load_images(path, img_size, 5, seed=123, mode='RGB')

Total number of images: 610
Total number of classes: 2
----------------------------
5 subsets with 122 images


In [None]:
#@title Train model { form-width: "40%" }

# path to folder where output images and results are stored
destination = r'/content/drive/MyDrive/Forsøk/Forsøk 19.08.2021 (notaugmented)'

# make a new folder where learning curves are stored
folder_name = 'Learning curves'
os.makedirs(destination + '/' + folder_name, exist_ok=True)

# DataFrame for individal test scores for all trials
individual_results = pd.DataFrame()

# DataFrame for class-wise accuracies for all trials
summary_results = pd.DataFrame()

# counter to keep track of trials
trial_num = 0

# to avoid error on del model on first trial
model = None

for test_ds in sets:
    for valid_ds in (ds for ds in sets if ds != test_ds):
        
        trial_num += 1
        
        generators = [ds for ds in sets if ds not in (test_ds, valid_ds)]
        
        x_tr = np.concatenate([generator['images'] for generator in generators])
        y_tr = np.concatenate([generator['labels'] for generator in generators])
        
        x_va = valid_ds['images']
        y_va = valid_ds['labels']
        
        # resample training data to balance out the classes
        negative_ds = tf.data.Dataset.from_tensor_slices(
            (x_tr[np.where(y_tr == 0)],
             y_tr[np.where(y_tr == 0)])).repeat()

        positive_ds = tf.data.Dataset.from_tensor_slices(
            (x_tr[np.where(y_tr == 1)], 
             y_tr[np.where(y_tr == 1)])).repeat()
        
        resampled_ds = tf.data.experimental.sample_from_datasets(
            [negative_ds, positive_ds], 
            weights=[0.5, 0.5])
        
        resampled_ds = resampled_ds.batch(batch_size)

        # With `clear_session()` called at the beginning,
        # Keras starts with a blank state at each iteration
        tf.keras.backend.clear_session()
        
        del model

        # Initialize base model using MobileNetV2 pretrained on imagenet data.
        # By using include_top=False we only include the feature extraction
        # layers. 
        base_model = tf.keras.applications.Xception(
            input_shape=img_shape,
            include_top=False,
            weights='imagenet'
            )

        # Fine-tune from this layer onwards
        fine_tune_at = 150

        # Freeze all the layers before the `fine_tune_at` layer
        for layer in base_model.layers[:fine_tune_at]:
            layer.trainable = False

        # Define full model. By calling 'training=False' we make the base
        # model run in inference mode, e.g. batch normalization parameters
        # are not updated, and dropout is not being used. 
        inputs = tf.keras.Input(shape=img_shape)
        x = preprocess_input(inputs)
        
        # data augmentation
        #x = RandomRotation((-0.2, 0.2))(x)
        #x = RandomZoom((-0.2, 0.2), (-0.2, 0.2))(x)
        #x = RandomFlip("horizontal")(x)

        x = base_model(x, training=False)
        x = GlobalAveragePooling2D()(x)
        x = Dropout(0.2)(x)
        outputs = Dense(1, activation='sigmoid')(x)

        model = tf.keras.Model(inputs, outputs)

        # set a high base learning rate for initial training
        base_learning_rate = 1e-3
        model.compile(
            optimizer=tf.keras.optimizers.Adam(learning_rate=base_learning_rate),
            loss=tf.keras.losses.BinaryCrossentropy(),
            metrics=['accuracy']
            )

        # set early stopping
        callbacks = [tf.keras.callbacks.EarlyStopping(
                patience=20, restore_best_weights=True)]

        # fit model
        history = model.fit(resampled_ds,
                            epochs=initial_epochs,
                            steps_per_epoch=12,
                            validation_data=(x_va, y_va),
                            callbacks=callbacks)

        acc = history.history['accuracy']
        val_acc = history.history['val_accuracy']

        loss = history.history['loss']
        val_loss = history.history['val_loss']

        epochs_run = len(acc)

        # Unfreeze the base_model. Note that it keeps running in inference mode
        # since we passed `training=False` when calling it. This means that
        # the batchnorm layers will not update their batch statistics.
        # This prevents the batchnorm layers from undoing all the training
        # we've done so far.
        base_model.trainable = True

        model.compile(
            optimizer=tf.keras.optimizers.Adam(1e-5),  # Low learning rate
            loss=tf.keras.losses.BinaryCrossentropy(),
            metrics=['accuracy'],
        )

        history_fine = model.fit(
            resampled_ds,
            epochs=100,
            steps_per_epoch=12,
            validation_data=(x_va, y_va),
            callbacks=callbacks)

        acc += history_fine.history['accuracy']
        val_acc += history_fine.history['val_accuracy']

        loss += history_fine.history['loss']
        val_loss += history_fine.history['val_loss']

        # plot loss and accuracy and save figure
        plt.figure(figsize=(12, 8))
        plt.subplot(2, 1, 1)
        plt.plot(acc, label='Training Accuracy')
        plt.plot(val_acc, label='Validation Accuracy')
        plt.ylim([0.8, 1])
        plt.plot([epochs_run-1,epochs_run-1],
                plt.ylim(), label='Start Fine Tuning')
        plt.legend(loc='lower right')
        plt.title('Training and Validation Accuracy')

        plt.subplot(2, 1, 2)
        plt.plot(loss, label='Training Loss')
        plt.plot(val_loss, label='Validation Loss')
        plt.ylim([0, 1.0])
        plt.plot([epochs_run-1,epochs_run-1],
                plt.ylim(), label='Start Fine Tuning')
        plt.legend(loc='upper right')
        plt.title('Training and Validation Loss')
        plt.xlabel('epoch')
        
        plt.savefig(
            fname=destination + '/' + folder_name + '/trial' + 
            str(trial_num))

        # evaluate model on test set and store results in DataFrames
        predictions = model.predict(test_ds['images'])
        labels = predictions.round()
        
        dataframe = pd.DataFrame(
                predictions.flatten(),
                index=test_ds['filenames'],
                columns=[trial_num])
        
        individual_results = pd.merge(
            individual_results, 
            dataframe, 
            how='outer', 
            left_index=True, 
            right_index=True
            )
        
        # compute class-wise accuracy
        idx = np.where(test_ds['labels'] == 0)

        acc_0 = np.sum(
            test_ds['labels'][idx] == labels.flatten()[idx]) / len(test_ds['labels'][idx])

        idx = np.where(test_ds['labels'] == 1)

        acc_1 = np.sum(
            test_ds['labels'][idx] == labels.flatten()[idx]) / len(test_ds['labels'][idx])
        
        dataframe = pd.DataFrame(
                [model.evaluate(test_ds['images'], test_ds['labels'])[1], acc_0, acc_1],
                index=['Accuracy', 'cc', 'neac'],
                columns=[trial_num])
        
        summary_results = pd.merge(
            summary_results, 
            dataframe, 
            how='outer', 
            left_index=True, 
            right_index=True)

individual_results.to_excel(destination + '/individual_results.xlsx')
summary_results.to_excel(destination + '/summary_results.xlsx')
individual_scores = (-np.log(1 / individual_results - 1))
individual_scores.to_excel(destination + '/individual_scores.xlsx')
individual_means = np.vstack((np.mean(individual_results, axis = 1), np.mean(individual_scores, axis = 1)))
individual_means = pd.DataFrame(individual_means.transpose(), index = individual_results.index, columns =['Mean probability', 'Mean score'])
individual_means.to_excel(destination + '/individual_means.xlsx')

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/xception/xception_weights_tf_dim_ordering_tf_kernels_notop.h5
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 