# Binary classification of commutative diagrams

In [1]:
#import torch
import os
import shutil
import random
import math
from copy import deepcopy

import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from keras import layers
from keras import models
from keras.datasets import mnist
import tensorflow_addons as tfa # Needed for Yogi optimizer
tfa.register_all(custom_kernels=False) # Registers TFA objects in TF dictionaries
import matplotlib.pyplot as plt
%matplotlib inline

## 1. Data pipeline

### 1.1 Expand working directory with train, test and validation folders

In [2]:
##
unsortedSamplesDirName = 'diagramSamplesMarch25'
##

cwd = os.getcwd()

unsortedSamplesDir = os.path.join(cwd, unsortedSamplesDirName)
assert('positive' in os.listdir(unsortedSamplesDir) and 'negative' in os.listdir(unsortedSamplesDir))
unsortedPositiveSamplesDir = os.path.join(unsortedSamplesDir, 'positive')
unsortedNegativeSamplesDir = os.path.join(unsortedSamplesDir, 'negative')

sortedSamplesDir = os.path.join(cwd, 'sortedSamples')
os.makedirs(sortedSamplesDir, exist_ok=True)

trainDir = os.path.join(sortedSamplesDir, 'train')
testDir = os.path.join(sortedSamplesDir, 'test')
valDir = os.path.join(sortedSamplesDir, 'val')
os.makedirs(trainDir, exist_ok=True)
os.makedirs(testDir, exist_ok=True)
os.makedirs(valDir, exist_ok=True)

trainPositiveDir = os.path.join(trainDir, 'positive')
trainNegativeDir = os.path.join(trainDir, 'negative')
testPositiveDir = os.path.join(testDir, 'positive')
testNegativeDir = os.path.join(testDir, 'negative')
valPositiveDir = os.path.join(valDir, 'positive')
valNegativeDir = os.path.join(valDir, 'negative')
os.makedirs(trainPositiveDir, exist_ok=True)
os.makedirs(trainNegativeDir, exist_ok=True)
os.makedirs(testPositiveDir, exist_ok=True)
os.makedirs(testNegativeDir, exist_ok=True)
os.makedirs(valPositiveDir, exist_ok=True)
os.makedirs(valNegativeDir, exist_ok=True)

### 1.2 Copy images into folders
The code currently randomly truncates the greater partition between positive and negative to achieve parity.

In [3]:
##
trainRatio = 0.6
testRatio = 0.2
valRatio = 0.2
assert(trainRatio + testRatio + valRatio == 1.)

positiveRatio = 0.5 # Desired ratio of positive samples in the sorted data

performCopy = False
##

if performCopy:
    positiveSamplesFilenameList = os.listdir(unsortedPositiveSamplesDir)
    negativeSamplesFilenameList = os.listdir(unsortedNegativeSamplesDir)
    random.shuffle(positiveSamplesFilenameList)
    random.shuffle(negativeSamplesFilenameList)
    numPositiveSamples = len(positiveSamplesFilenameList)
    numNegativeSamples = len(negativeSamplesFilenameList)

    if numPositiveSamples > numNegativeSamples:
        positiveSamplesFilenameList = positiveSamplesFilenameList[:numNegativeSamples]
        numPositiveSamples = len(positiveSamplesFilenameList)
    elif numNegativeSamples > numPositiveSamples:
        negativeSamplesFilenameList = negativeSamplesFilenameList[:numPositiveSamples]
        numNegativeSamples = len(negativeSamplesFilenameList)
    assert(numPositiveSamples == numNegativeSamples)

    numSamples = numPositiveSamples + numNegativeSamples

    numTrainSamples = math.floor(numSamples*trainRatio)
    numTestSamples = math.floor(numSamples*testRatio)
    numValSamples = math.floor(numSamples*valRatio)


    unsortedSamplesInfo = {'posDir':unsortedPositiveSamplesDir, 'negDir':unsortedNegativeSamplesDir, 'posFilenameList':positiveSamplesFilenameList,
                           'negFilenameList':negativeSamplesFilenameList}

    trainCopyInfo = {'num':numTrainSamples, 'posDir':trainPositiveDir, 'negDir':trainNegativeDir}
    testCopyInfo = {'num':numTestSamples, 'posDir':testPositiveDir, 'negDir':testNegativeDir}
    valCopyInfo = {'num':numValSamples, 'posDir':valPositiveDir, 'negDir':valNegativeDir}

    def copyImagesInPartition(unsortedSamplesInfo:dict, partitionCopyInfo:dict):
        for _ in range(math.floor(partitionCopyInfo['num']*positiveRatio)):
            copyImage(unsortedSamplesInfo['posFilenameList'], unsortedSamplesInfo['posDir'], partitionCopyInfo['posDir'])
        for _ in range(math.floor(partitionCopyInfo['num']*(1. - positiveRatio))):
            copyImage(unsortedSamplesInfo['negFilenameList'], unsortedSamplesInfo['negDir'], partitionCopyInfo['negDir'])

    def copyImage(sampleFilenameList, srcDir, dstDir):
        filename = sampleFilenameList.pop()
        src = os.path.join(srcDir, filename)
        dst = os.path.join(dstDir, filename)
        try:
            shutil.copyfile(src, dst)
        except PermissionError: # Ignores straggler files such as notebook checkpoints
            pass

    sampleDirectorySizes = [len(directory) for directory in [
        os.listdir(trainPositiveDir), os.listdir(trainNegativeDir), os.listdir(testPositiveDir), os.listdir(testNegativeDir),
        os.listdir(testPositiveDir), os.listdir(testNegativeDir)]]

    if all(size == 0 for size in sampleDirectorySizes):
        copyImagesInPartition(unsortedSamplesInfo, trainCopyInfo)
        copyImagesInPartition(unsortedSamplesInfo, testCopyInfo)
        copyImagesInPartition(unsortedSamplesInfo, valCopyInfo)
    else:
        raise Exception("Sorted image directories are not empty.")

[*Optional*]: Test whether a sample handful of the images were copied to the correct folders

In [4]:
##
runImageCopyTest = False

comparisonSampleSize = 50
imageFiletype = 'png'
##

if runImageCopyTest and performCopy:
    # These must be redefined here since they were popped
    positiveSamplesFilenameList = os.listdir(unsortedPositiveSamplesDir)
    negativeSamplesFilenameList = os.listdir(unsortedNegativeSamplesDir)

    sortedTrainPositiveFilenameList = os.listdir(trainPositiveDir)
    sortedTrainNegativeFilenameList = os.listdir(trainNegativeDir)
    sortedTestPositiveFilenameList = os.listdir(testPositiveDir)
    sortedTestNegativeFilenameList = os.listdir(testNegativeDir)
    sortedValPositiveFilenameList = os.listdir(valPositiveDir)
    sortedValNegativeFilenameList = os.listdir(valNegativeDir)
    random.shuffle(sortedTrainPositiveFilenameList)
    random.shuffle(sortedTrainNegativeFilenameList)
    random.shuffle(sortedTestPositiveFilenameList)
    random.shuffle(sortedTestNegativeFilenameList)
    random.shuffle(sortedValPositiveFilenameList)
    random.shuffle(sortedValNegativeFilenameList)

    allPositiveFilenameLists = [sortedTrainPositiveFilenameList, sortedTestPositiveFilenameList, sortedValPositiveFilenameList]
    allNegativeFilenameLists = [sortedTrainNegativeFilenameList, sortedTestNegativeFilenameList, sortedValNegativeFilenameList]

    def matchesImageFiletype(sampleFilename : str, imageFiletype : str): # Needed to ignore straggler files such as notebook checkpoints
        return sampleFilename[:len(imageFiletype)] == imageFiletype

    for fList in allPositiveFilenameLists:
        assert(all(sampleFilename in positiveSamplesFilenameList
                   for sampleFilename in fList[:comparisonSampleSize]
                   if matchesImageFiletype(sampleFilename, imageFiletype)))
    for fList in allNegativeFilenameLists:
        assert(all(sampleFilename in negativeSamplesFilenameList
                   for sampleFilename in fList[:comparisonSampleSize]
                   if matchesImageFiletype(sampleFilename, imageFiletype)))
    print('Test was succesful!')

### 1.3 Data generators

The data generators themselves can rescale the input pixel values to the [0, 1] range and convert to grayscale. Note that the EfficientNet baseline model expects 3D pixel floats in the [0-255] range. Thus, a seperate set of data generators is made for it.

In [5]:
##
batchSize = 75
imageResolution = (150, 150) # Can be tweaked! But TESTME: This might require rescaling of some network dimensions
##

trainDataGeneratorFactory = keras.preprocessing.image.ImageDataGenerator(rescale=1./255)
testDataGeneratorFactory = keras.preprocessing.image.ImageDataGenerator(rescale=1./255)
valDataGenerator = keras.preprocessing.image.ImageDataGenerator(rescale=1./255)

trainDataGenerator = trainDataGeneratorFactory.flow_from_directory( # Batch generator
    trainDir,
    target_size = imageResolution,
    batch_size = batchSize,
    color_mode = 'grayscale',
    class_mode = 'binary')
testDataGenerator = testDataGeneratorFactory.flow_from_directory(
    testDir,
    target_size = imageResolution,
    batch_size = batchSize,
    color_mode = 'grayscale',
    class_mode = 'binary')
valDataGenerator = testDataGeneratorFactory.flow_from_directory(
    valDir,
    target_size = imageResolution,
    batch_size = batchSize,
    color_mode = 'grayscale',
    class_mode = 'binary')


efficientNetTrainDataGeneratorFactory = keras.preprocessing.image.ImageDataGenerator()
efficientNetTestDataGeneratorFactory = keras.preprocessing.image.ImageDataGenerator()
efficientNetValDataGeneratorFactory = keras.preprocessing.image.ImageDataGenerator()

efficientNetTrainDataGenerator = efficientNetTrainDataGeneratorFactory.flow_from_directory( # Batch generator
    trainDir,
    target_size = imageResolution,
    batch_size = batchSize,
    class_mode = 'binary')
efficientNetTestDataGenerator = efficientNetTestDataGeneratorFactory.flow_from_directory(
    testDir,
    target_size = imageResolution,
    batch_size = batchSize,
    class_mode = 'binary')
efficientNetValDataGenerator = efficientNetValDataGeneratorFactory.flow_from_directory(
    valDir,
    target_size = imageResolution,
    batch_size = batchSize,
    class_mode = 'binary')

Found 1292 images belonging to 2 classes.
Found 430 images belonging to 2 classes.
Found 430 images belonging to 2 classes.
Found 1292 images belonging to 2 classes.
Found 430 images belonging to 2 classes.
Found 430 images belonging to 2 classes.


"\ncapsNetInputShape = (36, 36) # MULTIMNIST dimensions\n\ncapsNetTrainDataGeneratorFactory = keras.preprocessing.image.ImageDataGenerator(rescale=1./255)\ncapsNetTestDataGeneratorFactory = keras.preprocessing.image.ImageDataGenerator(rescale=1./255)\ncapsNetValDataGeneratorFactory = keras.preprocessing.image.ImageDataGenerator(rescale=1./255)\n\ncapsNetTrainDataGenerator = capsNetTrainDataGeneratorFactory.flow_from_directory( # Batch generator\n    trainDir,\n    target_size = capsNetInputShape,\n    batch_size = batchSize,\n    color_mode = 'grayscale',\n    class_mode = 'binary')\ncapsNetTestDataGenerator = capsNetTestDataGeneratorFactory.flow_from_directory(\n    testDir,\n    target_size = capsNetInputShape,\n    batch_size = batchSize,\n    color_mode = 'grayscale',\n    class_mode = 'binary')\ncapsNetValDataGenerator = capsNetValDataGeneratorFactory.flow_from_directory(\n    valDir,\n    target_size = capsNetInputShape,\n    batch_size = batchSize,\n    color_mode = 'grayscale',

## 2 Baseline models implementation
A series of baseline models are implemented for comparative study.
- A simple vanilla CNN model with a standard architecture
- Pretrained EfficientNet as feature extraction
- Finetuned pretrained EfficientNet
- EfficientNet manually trained on MNIST

Note that compiling the finetuned EfficientNet enforces compiling the feature extractor, since training the top classifier on this is a part of the process.

In [None]:
##
compileBaselineCNN = False
compileEfficientNetFE = False
compileFinetunedEfficientNet = False
compileMNISTEfficientNet = True
##

if compileFinetunedEfficientNet:
    compileEfficientNetBE = True

#### 2.1.1 Building baseline CNN

In [6]:
baselineCNN = models.Sequential(name='BaselineCNN')
baselineCNN.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(*imageResolution, 1)))
baselineCNN.add(layers.MaxPooling2D((2,2)))
baselineCNN.add(layers.Conv2D(64, (3, 3), activation='relu'))
baselineCNN.add(layers.MaxPooling2D((2,2)))
baselineCNN.add(layers.Conv2D(128, (3, 3), activation='relu'))
baselineCNN.add(layers.MaxPooling2D((2,2)))
baselineCNN.add(layers.Conv2D(128, (3, 3), activation='relu'))
baselineCNN.add(layers.MaxPooling2D((2,2)))
baselineCNN.add(layers.Flatten())
baselineCNN.add(layers.Dropout(0.5))
baselineCNN.add(layers.Dense(512, activation='relu'))
baselineCNN.add(layers.Dense(1, activation='sigmoid'))
#baselineCNN.summary()

#### 2.1.2 EfficientNet bases

In [7]:
efficientNetMinInputShape = (32, 32, 1)
# MNISTInputShape = (28, 28, 1)

# Pretrained EfficientNet
efficientNetB0Base = keras.applications.efficientnet.EfficientNetB0(
    include_top = False,
    input_shape = (*imageResolution, 3),
    # Try with pooling='avg'/'max'?
    weights = 'imagenet')
efficientNetB0Base.trainable = False

# For MNIST
untrainedEfficientNetB0Base = keras.applications.efficientnet.EfficientNetB0(
    include_top = False,
    input_shape = efficientNetMinInputShape,
    weights = None)

#### 2.1.3 Classifier for pretrained EfficientNet

In [31]:
efficientNetBaseCanonicalOutputShape = (5, 5, 1280)
efficientNetBaseCanonicalFlatOutputSize = np.product(efficientNetBaseCanonicalOutputShape)
efficientNetBaseCanonicalFlatOutputSizeGrayscale = 1280 # Wait, this is the channel axis, right? Maybe EfficientNet was a bad choice...

efficientNetB0FEClassifier = models.Sequential(name='efficientNetB0FEClassifier')
efficientNetB0FEClassifier.add(layers.Dense(1024, activation='relu', input_dim=efficientNetBaseCanonicalFlatOutputSize))
efficientNetB0FEClassifier.add(layers.Dense(256, activation='relu')) 
efficientNetB0FEClassifier.add(layers.Dropout(0.5))
efficientNetB0FEClassifier.add(layers.Dense(1, activation='sigmoid'))

#### 2.1.4 Classifiers for MNIST EfficientNet
Two classifiers are built. The "categorical classifier", for training on MNIST, with output length of 10, and the "binary classifier", for training and use on our binary dataset.

In [32]:
efficientNetB0MNISTCategoricalClassifier = models.Sequential(name='efficientNetB0MNISTCategoricalClassifier')
efficientNetB0MNISTCategoricalClassifier.add(layers.Dense(1024, activation='relu', input_dim=efficientNetBaseCanonicalFlatOutputSizeGrayscale)) # New
efficientNetB0MNISTCategoricalClassifier.add(layers.Dense(256, activation='relu')) 
efficientNetB0MNISTCategoricalClassifier.add(layers.Dropout(0.5))
efficientNetB0MNISTCategoricalClassifier.add(layers.Dense(10, activation='softmax'))

efficientNetB0MNISTBinaryClassifier = models.Sequential(name='efficientNetB0MNISTBinaryClassifier')
efficientNetB0MNISTBinaryClassifier.add(layers.Dense(1024, activation='relu', input_dim=efficientNetBaseCanonicalFlatOutputSizeGrayscale)) # New
efficientNetB0MNISTBinaryClassifier.add(layers.Dense(256, activation='relu')) 
efficientNetB0MNISTBinaryClassifier.add(layers.Dropout(0.5))
efficientNetB0MNISTBinaryClassifier.add(layers.Dense(1, activation='sigmoid'))

#### 2.2.1 Feature extraction function

In [33]:
def extractFeatures(model, generator, modelOutputShape:tuple, numSamples=None):
    if not numSamples:
        numSamples = generator.n
    if numSamples % batchSize != 0:
        numSamples -= (numSamples % batchSize)
    features = np.zeros(shape=(numSamples, *modelOutputShape))
    labels = np.zeros(shape=(numSamples))
    i = 0
    for inputBatch, labelBatch in generator:
        featureBatch = model.predict(inputBatch)
        features[i*batchSize : (i + 1)*batchSize] = featureBatch
        labels[i*batchSize : (i + 1)*batchSize] = labelBatch
        i += 1
        if i*batchSize >= numSamples:
            break
    return features, labels

#### 2.2.2 Pretrained EfficientNet feature extraction

In [34]:
if compileEfficientNetFE:
    efficientNetTrainFeatures, efficientNetTrainLabels = extractFeatures(efficientNetB0Base, efficientNetTrainDataGenerator, efficientNetBaseCanonicalOutputShape)
    efficientNetTestFeatures, efficientNetTestLabels = extractFeatures(efficientNetB0Base, efficientNetTestDataGenerator, efficientNetBaseCanonicalOutputShape)
    efficientNetValFeatures, efficientNetValLabels = extractFeatures(efficientNetB0Base, efficientNetValDataGenerator, efficientNetBaseCanonicalOutputShape)

    efficientNetTrainFeatures = np.reshape(efficientNetTrainFeatures, (efficientNetTrainFeatures.shape[0], efficientNetBaseCanonicalFlatOutputSize))
    efficientNetTestFeatures = np.reshape(efficientNetTestFeatures, (efficientNetTestFeatures.shape[0], efficientNetBaseCanonicalFlatOutputSize))
    efficientNetValFeatures = np.reshape(efficientNetValFeatures, (efficientNetValFeatures.shape[0], efficientNetBaseCanonicalFlatOutputSize))

    flattenedEfficientNetFeatureShape = efficientNetTrainFeatures.shape

### 2.3 Compilation of baseline models

In [35]:
##
metrics = [keras.metrics.BinaryAccuracy(), keras.metrics.BinaryCrossentropy(), keras.metrics.AUC(), keras.metrics.Precision(), keras.metrics.Recall()]
metricNames = ['binary accuracy', 'binary cross entropy', 'AUC', 'precision', 'recall']

epochs = 100
##

#### 2.3.1 Baseline CNN

In [36]:
if compileBaselineCNN:
    baselineCNN.compile(optimizer=tfa.optimizers.Yogi(),
                        loss='binary_crossentropy',
                        metrics=metrics)
    baselineCNNHistory = baselineCNN.fit(trainDataGenerator,
                                        epochs=epochs,
                                        validation_data=valDataGenerator)
    baselineCNN.save('baselineCNN.h5')

#### 2.3.2 EfficientNet feature extraction classifier

In [37]:
if compileEfficientNetFE or compileFinetunedEfficientNet:
    efficientNetB0FEClassifier.compile(optimizer=tfa.optimizers.Yogi(),
                                       loss='binary_crossentropy',
                                       metrics=metrics)
    efficientNetB0FEClassifierHistory = efficientNetB0FEClassifier.fit(efficientNetTrainFeatures, efficientNetTrainLabels,
                                                                       epochs=epochs,
                                                                       batch_size=batchSize,
                                                                       validation_data=(efficientNetValFeatures, efficientNetValLabels))
    efficientNetB0FEClassifier.save('efficientNetB0FEClassifier.h5')

### 2.3.3 Finetuned EfficientNet

In [38]:
# Should we train this one less?
##
finetuningLearningRate = 1e-5
layersToUnfreeze = ['top_conv', 'block7a_project_conv', 'block7a_dwconv']
##

if compileFinetunedEfficientNet:
    trainableEfficientNetB0Base = efficientNetB0Base
    trainableEfficientNetB0Base.trainable = True
    for layer in trainableEfficientNetB0Base.layers:
        if layer.name not in layersToUnfreeze:
            layer.trainable = False

    finetunedEfficientNetB0 = models.Sequential(name='finetunedEfficientNetB0')
    finetunedEfficientNetB0.add(trainableEfficientNetB0Base)
    finetunedEfficientNetB0.add(layers.Flatten())
    finetunedEfficientNetB0.add(efficientNetB0FEClassifier) # Note that this is already trained
    #finetunedEfficientNetB0.summary()

    finetunedEfficientNetB0.compile(optimizer=tfa.optimizers.Yogi(lr=finetuningLearningRate),
                                    loss='binary_crossentropy',
                                    metrics=metrics)
    finetunedEfficientNetB0History = finetunedEfficientNetB0.fit(efficientNetTrainDataGenerator,
                                                                 epochs=epochs,
                                                                 batch_size=batchSize,
                                                                 validation_data=efficientNetValDataGenerator)
    finetunedEfficientNetB0.save('finetunedEfficientNetB0.h5')

### 2.3.4 MNIST EfficientNet

In [None]:
##
MNISTTrainEpochs = 100
##

if compileMNISTEfficientNet:
    MNISTEfficientNetB0Categorical = models.Sequential(name='MNISTEfficientNetB0Categorical')
    MNISTEfficientNetB0Categorical.add(untrainedEfficientNetB0Base)
    MNISTEfficientNetB0Categorical.add(layers.Flatten())
    MNISTEfficientNetB0Categorical.summary()
    MNISTEfficientNetB0Categorical.add(efficientNetB0MNISTCategoricalClassifier)
    
    (MNISTTrainImages, MNISTTrainLabels), (MNISTTestImages, MNISTTestLabels) = mnist.load_data()
    
    MNISTTrainImages = MNISTTrainImages.astype('float32') / 255
    MNISTTestImages = MNISTTestImages.astype('float32') / 255
    
    # Transform to conform to minimum input size by padding with 4 white pixels on each spatial axis (2 before and after)
    MNISTTrainImages = np.pad(MNISTTrainImages, pad_width=((0, 0), (2, 2), (2,2)), constant_values=(1., 1.))
    MNISTTestImages = np.pad(MNISTTestImages, pad_width=((0,0), (2,2), (2,2)), constant_values=(1., 1.))
    
    MNISTTrainLabels = keras.utils.to_categorical(MNISTTrainLabels)
    MNISTTestLabels = keras.utils.to_categorical(MNISTTestLabels)
    
    MNISTEfficientNetB0Categorical.compile(optimizer=tfa.optimizers.Yogi(),
                               loss='categorical_crossentropy',
                               metrics=metrics) # Could cut the metrics out
    
    MNISTEfficientNetB0Categorical.fit(MNISTTrainImages, MNISTTrainLabels, epochs=MNISTTrainEpochs, batch_size=batchSize)
    MNISTEfficientNetB0.save('MNISTEfficientNetB0Categorical.h5')
    
    
    MNISTEfficientNetB0Binary = models.Sequential(name='MNISTEfficientNetB0Binary')

    # Architecture for downsampling from (150, 150, 3) to (38, 38, 1)
    MNISTEfficientNetB0Binary.add(layers.MaxPool2D(pool_size=(3, 3), strides=3, padding='same', input_dim=(*imageResolution, 3))) # Output shape = (50, 50, 3)?
    MNISTEfficientNetB0Binary.add(layers.Conv2D(1, 5, padding='same', activation = 'relu')) # Output shape = (42, 42, 3)?
    MNISTEfficientNetB0Binary.add(layers.Conv2D(1, 3, padding='same', activation = 'relu')) # Output shape = (38, 38, 3)?
    MNISTEfficientNetB0Binary.add(layers.GlobalMaxPool1D()) # Output shape = (38, 38, 1)?
    #
                                  
    trainedEfficientNetB0Base = untrainedEfficientNetB0Base
    trainedEfficientNetB0Base.trainable = False
    
    MNISTEfficientNetB0Binary.add(trainedEfficientNetB0Base)
    MNISTEfficientNetB0Binary.add(layers.Flatten())
    MNISTEfficientNetB0Binary.add(efficientNetB0MNISTBinaryClassifier)
    #MNISTEfficientNetB0Binary.summary()
    
    MNISTEfficientNetB0Binary.compile(optimizer=tfa.optimizers.Yogi(),
                                       loss='binary_crossentropy',
                                       metrics=metrics)
    MNISTEfficientNetB0BinaryClassifierTrainingHistory = MNISTEfficientNetB0Binary.fit(efficientNetTrainFeatures, efficientNetTrainLabels,
                                                                       epochs=epochs,
                                                                       batch_size=batchSize,
                                                                       validation_data=(efficientNetValFeatures, efficientNetValLabels))
    
    trainedEfficientNetB0Base.trainable = true
    MNISTEfficientNetB0Binary.compile(optimizer=tfa.optimizers.Yogi(lr=finetuningLearningRate),
                                       loss='binary_crossentropy',
                                       metrics=metrics)
    MNISTEfficientNetB0BinaryClassifierTrainingHistory = MNISTEfficientNetB0Binary.fit(efficientNetTrainFeatures, efficientNetTrainLabels,
                                                                       epochs=epochs,
                                                                       batch_size=batchSize,
                                                                       validation_data=(efficientNetValFeatures, efficientNetValLabels))
    MNISTEfficientNetB0Binary.save('MNISTEfficientNetB0Binary.h5')

Model: "MNISTEfficientNetB0Categorical"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
efficientnetb0 (Functional)  (None, 1, 1, 1280)        4048991   
_________________________________________________________________
flatten_13 (Flatten)         (None, 1280)              0         
Total params: 4,048,991
Trainable params: 4,006,972
Non-trainable params: 42,019
_________________________________________________________________
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100

#### 2.4.1 History plotting functions

In [None]:
def zipMetricsKeysWithNames(keys, metricNames):
    metricKeys = list(keys)
    metricKeysTrainingOnly = metricKeys[1:len(metricKeys) // 2] # Ignores the generic loss metric and truncates all validation histories
    assert(len(metricNames) == len(metricKeysTrainingOnly))
    zippedMetrics = zip(metricKeysTrainingOnly, metricNames)
    return zippedMetrics

def plotTrainingHistories(history, metricsKeysNamesZip, modelName:str):
    for (key, name) in metricsKeysNamesZip:
        metricHistory = history.history[key]
        metricValHistory = history.history[f'val_{key}']
        plotOneTrainingHistory(metricHistory, metricValHistory, name, modelName)

def plotOneTrainingHistory(metricHistory, metricValHistory, metricName, modelName):
    plt.figure()
    epochs = range(1, len(metricHistory) + 1)
    plt.plot(epochs, metricHistory, 'bo', label=f'Training {metricName}')
    plt.plot(epochs, metricValHistory, 'r', label=f'Validation {metricName}')
    plt.xlabel('Epoch')
    plt.ylabel(metricName)
    plt.title(f'Training and validation {metricName} for {modelName}')
    plt.legend()
    shortModelName = shortenModelOrMetricName(modelName)
    shortMetricName = shortenModelOrMetricName(metricName)
    plt.savefig(f'{shortModelName}{shortMetricName}.jpeg', transparent=False, bbox_inches='tight')

def shortenModelOrMetricName(modelOrMetricName:str):
    return modelOrMetricName.title().replace(' ','')

### 2.5.1 History plots for baseline CNN

In [None]:
if compileBaselineCNN:
    metricsKeysNames = zipMetricsKeysWithNames(baselineCNNHistory.history.keys(), metricNames)
    plotTrainingHistories(baselineCNNHistory, metricsKeysNames, 'Baseline CNN')
    plt.show()

### 2.5.2 History plots for EfficientNet feature extractor

In [None]:
if compileEfficientNetFE:
    metricsKeysNames = zipMetricsKeysWithNames(efficientNetB0FEClassifierHistory.history.keys(), metricNames)
    plotTrainingHistories(efficientNetB0FEClassifierHistory, metricsKeysNames, 'EfficientNet feature extraction')
    plt.show()

### 2.5.3 History plots for finetuned EfficientNet

In [None]:
if compileFinetunedEfficientNet:
    metricsKeysNames = zipMetricsKeysWithNames(finetunedEfficientNetB0History.history.keys(), metricNames)
    plotTrainingHistories(finetunedEfficientNetB0History, metricsKeysNames, 'Finetuned EfficientNet')
    plt.show()

### 2.5.4 MNIST-EfficientNet

In [None]:
if compileMNISTEfficientNet:
    metricsKeysNames = zipMetricsKeysWithNames(MNISTEfficientNetB0Binary.history.keys(), metricNames)
    plotTrainingHistories(MNISTEfficientNetB0BinaryHistory, metricsKeysNames, 'MNIST EfficientNet')
    plt.show()

### 2.5.5 Function for generating aggregate multiplots from saved plot images

In [None]:
#
shortModelNames = ['BaselineCnn', 'EfficientNetFeatureExtraction', 'FinetunedEfficientNet', 'MNISTEfficientNet']
shortMetricNames = ['BinaryAccuracy', 'BinaryCrossEntropy', 'Precision', 'Recall']

generateMultiplot = True
#

def plotAggregateStats(shortModelNames, shortMetricNames):
    numModels = len(shortModelNames)
    numMetrics = len(shortMetricNames)

    subplotIndex = 1
    for model in shortModelNames:
        for metric in shortMetricNames:
            plt.subplot(numModels, numMetrics, subplotIndex)
            graph = plt.imread(f'{model}{metric}.png') # Change to jpeg!
            plt.imshow(graph) # Does this work?
            subplotIndex += 1
    plt.suptitle('Training statistics for baseline models', y=1.05)
    plt.savefig(f'AggregateTrainStats{numModels}Models{numMetrics}Metrics.jpeg')
    
if generateMultiplot:
    plotAggregateStats(shortModelNames, shortMetricNames)
    plt.show()