# Problem 1: PatchCamelyon
*Course*: DS807 \
*Authors*: August E. Wennerwald, Kasper Lin Hannberg, Oliver Klejst, Søren Pico, Thomas Fischer



## Modules and data

In [113]:
# -- MODULES -- #

# Tensorflow
import tensorflow_datasets as tfds
import tensorflow as tf
from tensorflow.keras.applications.resnet50 import ResNet50

from tensorflow.keras.applications import EfficientNetB0, EfficientNetB1, EfficientNetB2
from tensorflow.keras.layers import Input, Dense, Conv2D, Flatten, Dropout, MaxPooling2D, BatchNormalization, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import plot_model


# Additional
from matplotlib import pyplot as plt
import numpy as np
from sklearn.metrics import mean_squared_error, accuracy_score
import random
from tqdm import tqdm
from re import VERBOSE

# -- DIRECTORY -- #
wd = './'

In [114]:
# NOTE: un-comment if run in google colab

# from google.colab import drive
# drive.mount('/content/drive')
# wd = '/content/drive/My Drive/DS807 (AML)/'
# print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Num GPUs Available:  1


In [23]:
# Convert data to (x,y)-format
def convert_sample(sample):
    image, label = sample['image'], sample['label']
    image = tf.image.convert_image_dtype(image, tf.float32)
    label = tf.one_hot(label, 2, dtype=tf.float32)
    return image, label

In [22]:
# -- LOAD DATA -- #

# AE, VAE
d1a, d2a, d3a = tfds.load('patch_camelyon', split=[f'train[50%:]',f'test[50%:]',f'validation[50%:]'],
                          data_dir=wd+'DATA/PCAM',
                          download=False,
                          shuffle_files=True)

# Classification
d1b, d2b, d3b = tfds.load('patch_camelyon', split=[f'train[:50%]',f'test[:50%]',f'validation[:50%]'],
                          data_dir=wd+'DATA/PCAM',
                          download=False,
                          shuffle_files=True)

# AE, VAE
train_images_a = d1a.map(lambda x: convert_sample(x)[0]).batch(128)
validation_images_a = d3a.map(lambda x: convert_sample(x)[0]).batch(128)
test_images_a = d2a.map(lambda x: convert_sample(x)[0]).batch(128)

# Classification
train_data_b = d1b.map(convert_sample).batch(64)
validation_data_b = d3b.map(convert_sample).batch(64)
test_data_b = d2b.map(convert_sample).batch(64)

In [21]:
# -- ADDITIONAL FUNCTIONS -- #

# Function for freezing parameters of input model
def freeze_model_parameters(model):
    for l in model.layers:
        l.trainable = False

# Function for generating plots of traning history
def plot_training_hist(hist, best_epoch = None):
  fig = plt.figure(figsize=plt.figaspect(0.1 * 2))

  ax = fig.add_subplot(1, 2, 1)
  ax.plot(hist.history['val_loss'], label='Validation loss')
  ax.plot(hist.history['loss'], label='Training loss')
  plt.xlabel('Epoch')
  plt.ylabel('Loss')
  if best_epoch is not None:
    plt.axvline(x=best_epoch, c='black', linestyle='--', label='Best epoch')
  plt.legend()

  ax = fig.add_subplot(1, 2, 2)
  ax.plot(hist.history['val_accuracy'], label='Validation accuracy')
  ax.plot(hist.history['accuracy'], label='Training accuracy')
  plt.xlabel('Epoch')
  plt.ylabel('Accuracy')
  plt.legend()

## Baseline CNN models

In [None]:
# -- FUNCTION TO PLOT RESHAPE SAMPLE -- ##

# To show resized images - this example is with 32,32,1, same has been used for 6,6,1
# train_images_baseline = d1a.map(lambda x: convert_sample(x)[0]).batch(32)
# validation_images_baseline = d3a.map(lambda x: convert_sample(x)[0]).batch(32)
# test_images_baseline = d2a.map(lambda x: convert_sample(x)[0]).batch(32)

# Test reshape
def sample_reshape(idx = None, shape = [6,6], grayScale = True, normalize = False):
    fig = plt.figure(figsize=(10,10))

    batch = next(iter(test_images_a))
    if idx is None:
        idx = random.randint(0,len(batch))
    print(idx)

    original_img = batch[idx].numpy()
    fig.add_subplot(1,2,1)
    plt.imshow(original_img)
    plt.axis('off')

    reshaped_img = tf.image.resize(original_img, shape)

    if grayScale:
        reshaped_img = tf.image.rgb_to_grayscale(reshaped_img)

    if normalize:
        reshaped_img = reshaped_img / 255

    fig.add_subplot(1,2,2)
    plt.imshow(reshaped_img, cmap='gray')
    plt.axis('off')

    print(f'Size of reshaped image: {reshaped_img.shape}')

# sample_reshape(19, shape = [32,32])
# sample_reshape(19, shape = [6,6])

In [None]:
# -- 6x6 IMAGE CLASSIFICATION -- #

#Function to resize and grayscale images
def resize_six(image):
    image= tf.image.resize(image,[6,6])
    image = tf.image.rgb_to_grayscale(image)
    return image

#apply the function on the datasets
train_data_reshaped_six = train_data_b.map(lambda x, y: (resize_six(x), y))
validation_data_reshaped__six = validation_data_b.map(lambda x, y: (resize_six(x), y))
test_data_reshaped__six = test_data_b.map(lambda x, y: (resize_six(x), y))

#CNN model for 6x6 pixels
cnn_model_six = tf.keras.models.Sequential([
    Conv2D(6, (2, 2), padding='same', activation='relu', input_shape=(6,6,1)),
    BatchNormalization(),
    Conv2D(12, (2, 2), padding='same', activation='relu'),
    MaxPooling2D(pool_size=(2,2)),
    Conv2D(18, (2,2), padding='same', activation= 'relu'),
    Dropout(rate=0.3),
    Conv2D(36, (2,2), padding='same', activation= 'relu'),
    Conv2D(72, (3,3), padding='same', activation= 'relu'),
    Dropout(rate=0.2),
    Conv2D(96, (4,4), padding='same', activation= 'relu'),
    Flatten(),
    Dense(36, activation='relu'),
    BatchNormalization(),
    Dense(12, activation='relu'),
    Dropout(rate=0.2),
    Dense(6, activation='relu'),
    Dense(2, activation='softmax'),
], name="cnn_model_six")

#optimizer, applying a Stochastic Gradient Descent
sgd_opt = SGD(learning_rate=0.001, momentum=0.9, nesterov=True)

#one hot encoded from the convert_sample function so therefore categorical_crossentropy
cnn_model_six.compile(loss='categorical_crossentropy',
                      optimizer=sgd_opt,
                      metrics=['accuracy'])

#Early stopping callback
early_stop = EarlyStopping(monitor='val_loss',
                           patience=3,
                           restore_best_weights=True,
                           verbose=1)

#Get overview and parameters of model
# cnn_model_six.summary()

In [None]:
# Fitting the model, running it for 50 epochs with early_stop as callback
hist_six = cnn_model_six.fit(train_data_reshaped_six,
                 validation_data=validation_data_reshaped__six,
                 epochs=50,
                 callbacks=[early_stop])

# cnn_model_six.save_weights(wd+'ds807_problem1_weights/saved_weights_baseline_CNN/CNN_baseline_six_weights.h5')

In [None]:
#Now plotting accuracy and loss for training and validation set using function plots()
plot_training_hist(hist_six, best_epoch = 5)

#To evaluate the model on the test data
cnn_model_six.evaluate(test_data_reshaped__six)

In [None]:
# -- 32x32 IMAGE CLASSIFICATION -- #

#same procedure as before, this time rescaling to 32,32
def resize(image):
    image= tf.image.resize(image,[32,32])
    image = tf.image.rgb_to_grayscale(image)
    return image

#apply function on datasets
train_data_reshaped_thirtytwo = train_data_b.map(lambda x, y: (resize(x), y))
validation_data_reshaped__thirtytwo = validation_data_b.map(lambda x, y: (resize(x), y))
test_data_reshaped__thirtytwo = test_data_b.map(lambda x, y: (resize(x), y))

#CNN model for 32x32 pixels
cnn_model_thirtytwo = tf.keras.models.Sequential([
    Conv2D(32, (2, 2), padding='same', activation='relu', input_shape=(32,32,1)),
    BatchNormalization(),
    Conv2D(64, (2, 2), padding='same', activation='relu'),
    MaxPooling2D(pool_size=(2,2)),
    Conv2D(64, (2,2), padding='same', activation= 'relu'),
    Dropout(rate=0.3),
    Conv2D(32, (2,2), padding='same', activation= 'relu'),
    Flatten(),
    Dense(16, activation='relu'),
    BatchNormalization(),
    Dense(8, activation='relu'),
    Dropout(rate=0.2),
    Dense(2, activation='softmax'),
], name="cnn_model_thirtytwo")

#Creating optimizer, the Stochastic Gradient Descent
sgd_opt = SGD(learning_rate=0.001, momentum=0.9, nesterov=True)

#compiling model
cnn_model_thirtytwo.compile(loss='categorical_crossentropy',
                            optimizer=sgd_opt,
                            metrics=['accuracy'])

#Overview of model to get parameters
#cnn_model_thirtytwo.summary()

In [None]:
# Fitting the model, running it for 50 epochs with early_stop as callback
hist_thirtytwo = cnn_model_thirtytwo.fit(train_data_reshaped_thirtytwo,
                 validation_data=validation_data_reshaped__thirtytwo,
                 epochs=50,
                 callbacks=[early_stop])

# cnn_model_thirtytwo.save_weights(wd+'ds807_problem1_weights/saved_weights_baseline_CNN/CNN_baseline_thirtytwo_weights.h5')

In [None]:
#Now plotting accuracy and loss for training and validation set using function plots()
plot_training_hist(hist_thirtytwo, best_epoch=5)

#Evaluating model on test set
cnn_model_thirtytwo.evaluate(test_data_reshaped__thirtytwo)

## Autoender (AE)

### Building and training AE-model

In [None]:
# -- BUILD AND TRAIN AE-MODEL -- #

encoder_ae = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(16,kernel_size=(3, 3),activation='relu',padding='same',strides=2,input_shape=(96, 96, 3)),
    tf.keras.layers.Conv2D(8, kernel_size=(3, 3), activation='relu', padding='same', strides=2),
],name='encoder_ae')

decoder_ae = tf.keras.models.Sequential([
    tf.keras.layers.Conv2DTranspose(8,3,strides=2,activation='relu',padding='same',input_shape=(24, 24, 8)),
    tf.keras.layers.Conv2DTranspose(16, 3, strides=2, activation='relu', padding='same'),
    tf.keras.layers.Conv2D(3, kernel_size=(3, 3), activation='sigmoid', padding='same'),
],name='decoder_ae')

autoencoder = tf.keras.models.Sequential([encoder_ae, decoder_ae], name='autoencoder')

autoencoder.compile(loss='mse', optimizer='adam')

ae_early_stop = EarlyStopping(monitor='val_loss',
                           patience=3,
                           restore_best_weights=True,
                           verbose=1)

In [None]:
# Create new data for training autoender
ae_train, ae_test, ae_val = tfds.load('patch_camelyon', split=[f'train[:50%]',f'test[:50%]',f'validation[:50%]'],
                          data_dir=wd+'DATA/PCAM',
                          download=False,
                          shuffle_files=True)


ae_train = ae_train.map(convert_sample).batch(32)
ae_test = ae_test.map(convert_sample).batch(32)
ae_val = ae_val.map(convert_sample).batch(32)

In [None]:
autoencoder.fit(ae_train.map(lambda x, y: (x,x)),
                validation_data=ae_val.map(lambda x, y: (x,x)),
                epochs=30,
                callbacks=[ae_early_stop])

#encoder_ae.save_weights(wd+'ds807_problem1_weights/saved_weights_AE/ae_encoder_weights.h5')
#decoder_ae.save_weights(wd+'ds807_problem1_weights/saved_weights_AE/ae_decoder_weights.h5')

In [None]:
# -- SAMPLE AE RECONSTRUCTIONS -- #

# Load autoencoder weights
encoder_ae.load_weights(wd+'saved_weights_AE/ae_encoder_weights.h5')
decoder_ae.load_weights(wd+'saved_weights_AE/ae_decoder_weights.h5')
autoencoder = tf.keras.models.Sequential([encoder_ae, decoder_ae], name='autoencoder')

# Function for plotting sample AE reconstructions
def plot_ae_reconstructions(n, model):

    class_label_dict = {0: 'benign', 1: 'malignant'}
    plt.figure(figsize=(20,4))
    batch = next(iter(ae_test))

    for i in range(n):
        original_img = batch[0][i].numpy()
        class_label_int = tf.argmax(batch[1][i]).numpy()
        ax = plt.subplot(2, n, i + 1)
        plt.imshow(original_img)
        plt.title(f'original \n {class_label_int} ({class_label_dict[class_label_int]})')
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)

        original_img_reshaped = original_img.reshape((1,)+original_img.shape)
        reconstructed_img = model.predict(original_img_reshaped)
        reconstructed_img_reshaped = np.reshape(reconstructed_img, (96,96,3))
        mse = round(tf.keras.losses.MeanSquaredError()(original_img, reconstructed_img_reshaped).numpy(),4)
        mse = str(mse)
        ax = plt.subplot(2, n, i + 1 + n)
        plt.imshow(reconstructed_img_reshaped)
        plt.title("reconstructed")
        plt.title(f'reconstructions \n MSE = {mse}')
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)
    plt.subplots_adjust(hspace=0.5, wspace=0.2)
    plt.show()

plot_ae_reconstructions(10, autoencoder)

### AE-based classification

In [None]:
# -- AE-BASED CLASSIFICATION MODEL -- #

# Load and freeze encoder parameters
encoder_ae.load_weights(wd+'saved_weights_AE/ae_encoder_weights.h5')
freeze_model_parameters(encoder_ae)

#CNN model:
ae_classifier = tf.keras.models.Sequential([
    encoder_ae,
    Conv2D(16, (2,2), padding='same', activation='relu'),
    Conv2D(32, (2,2), padding='same', activation='relu'),
    Dropout(rate=0.2),
    Conv2D(32, (2,2), padding='same', activation='relu'),
    Conv2D(16, (2,2), padding='same', activation='relu'),
    Dropout(rate=0.2),
    Conv2D(8, (2,2), padding='same', activation='relu'),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(32, activation='relu'),
    Dropout(rate=0.5),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(2, activation='softmax'),
], name = 'ae_classifier')

#Optimizer, using the Stochastic Gradient Descent
sgd_opt = SGD(learning_rate=0.001, momentum=0.9, nesterov=True)

#one hot encoded so therefore categorical_crossentropy
ae_classifier.compile(loss='categorical_crossentropy', optimizer=sgd_opt, metrics=['accuracy'])

#Early stopping as callback
early_stop_ae_classification = EarlyStopping(monitor='val_loss',
                                             patience=3,
                                             restore_best_weights=True,
                                             verbose = 1)

# Summary of model
ae_classifier.summary()

In [None]:
#Fitting model:
ae_classification_hist = ae_classifier.fit(train_data_b,
                   validation_data=validation_data_b,
                   epochs=50,
                   callbacks =[early_stop_ae_classification])


# ae_classifier.save_weights(wd+'ds807_problem1_weights/saved_weights_AE/ae_classifier_weights.h5')

In [None]:
# Test ae-classifier and plot training
# ae_classifier.evaluate(test_data_b)
plot_training_hist(ae_classification_hist)

## Variational autoencoder (VAE)

### Building and training VAE-models

In [38]:
# -- GENERAL SETUP OF VAE MODELS -- #

# VAE class
class VAE(tf.keras.Model):
    def __init__(self, latent_dim, encoder, decoder):
        super(VAE, self).__init__()
        self.latent_dim = latent_dim
        self.encoder = encoder
        self.decoder = decoder

    def encode(self, x):
        params = self.encoder(x)
        return tf.split(params, num_or_size_splits=2, axis=1) # mean, logvar

    def decode(self, z):
        return self.decoder(z)

    def reparameterize(self, mean, logvar):
        eps = tf.random.normal(shape=mean.shape)
        return eps * tf.exp(logvar * 0.5) + mean

    @tf.function
    def sample(self, eps=None):
        if eps is None:
            eps = tf.random.normal(shape=(100, self.latent_dim))
        return tf.sigmoid(self.decode(eps))

# Loss function
def log_normal_pdf(sample, mean, logvar, raxis=1):
    log2pi = tf.math.log(2. * np.pi)
    vals = -.5 * ((sample - mean) ** 2. * tf.exp(-logvar) + logvar + log2pi)

    return tf.reduce_sum(vals, axis=raxis)

def compute_loss(model, x):
    mean, logvar = model.encode(x)
    z = model.reparameterize(mean, logvar)
    x_logit = model.decode(z)
    cross_ent = tf.nn.sigmoid_cross_entropy_with_logits(logits=x_logit, labels=x)
    logpx_z = -tf.reduce_sum(cross_ent, axis=[1, 2, 3])
    logpz = log_normal_pdf(z, 0., 0.)
    logqz_x = log_normal_pdf(z, mean, logvar)

    return -tf.reduce_mean(logpx_z + logpz - logqz_x)

# Training step
@tf.function
def train_step(model, x, optimizer):
    with tf.GradientTape() as tape:
        loss = compute_loss(model, x)

    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))

# Reconstruct and plot test/validation images
def generate_and_show_images(model, epoch, test_sample):
    mean, logvar = model.encode(test_sample)
    z = model.reparameterize(mean, logvar)
    predictions = model.sample(z)
    fig = plt.figure(figsize=(4, 4))

    for i in range(predictions.shape[0]):
        plt.subplot(4, 4, i + 1)
        plt.imshow(predictions[i])
        plt.axis('off')

    plt.show()

# Setup optimizer
optimizer = tf.keras.optimizers.legacy.Adam(1e-4)   # Use when run on M1/M2
#optimizer = tf.keras.optimizers.Adam(1e-4)          # Use otherwise

# Sample for reconstrunction
test_sample = next(iter(validation_images_a.take(1)))[:16]


In [39]:
# -- VAE_16 MODEL -- #

vae16_encoder = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(64, kernel_size= 3, strides = 1, padding='same', activation='relu', input_shape=(96, 96, 3)),
    tf.keras.layers.Conv2D(128, kernel_size=3, strides= 2, padding='same', activation='relu'),
    tf.keras.layers.Conv2D(256, kernel_size=3, strides= 2, padding='same', activation='relu'),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(2 * 16), # latent dim = 16
])

vae16_decoder = tf.keras.models.Sequential([
    tf.keras.layers.Dense(units=24*24*32, activation='relu', input_shape=(16,)),
    tf.keras.layers.Reshape(target_shape= (24, 24, 32)),
    tf.keras.layers.Conv2DTranspose(256, kernel_size = 3, strides= 2, padding='same', activation='relu'),
    tf.keras.layers.Conv2DTranspose(128, kernel_size = 3, strides= 1, padding='same', activation='relu'),
    tf.keras.layers.Conv2DTranspose(64, kernel_size = 3, strides= 1, padding='same', activation='relu'),
    tf.keras.layers.Conv2DTranspose(32, kernel_size = 3, strides= 2, padding='same', activation='relu'),
    tf.keras.layers.Conv2DTranspose(3,3, strides= 1, padding="same", activation="sigmoid")
])

#vae16_encoder.summary()
#vae16_decoder.summary()

vae16 = VAE(16, vae16_encoder, vae16_decoder)


In [None]:
# -- TRAINING OF VAE_16 MODEL -- #
for epoch in range(30):
    for train_x in train_images_a:
        train_step(vae16, train_x, optimizer)

    loss = tf.keras.metrics.Mean()
    for test_x in test_images_a:
        loss(compute_loss(vae16, test_x))
    variational_lower_bound = -loss.result()

    print(f'Epoch: {epoch+1}, Test set variational lower bound: {variational_lower_bound}')
    generate_and_show_images(vae16, epoch, test_sample)

In [None]:
# -- SAVE VAE_16 EN+DECODER WEIGHTS -- #
#vae16_encoder.save_weights(wd+'ds807_problem1_weights/saved_weights_VAE/VAE_16_encoder_weights.h5')
#vae16_decoder.save_weights(wd+'ds807_problem1_weights/saved_weights_VAE/VAE_16_decoder_weights.h5')

In [40]:
# -- VAE_32 MODEL -- #

vae32_encoder = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(64, kernel_size= 3, strides = 1, padding='same', activation='relu', input_shape=(96, 96, 3)),
    tf.keras.layers.Conv2D(128, kernel_size=3, strides= 2, padding='same', activation='relu'),
    tf.keras.layers.Conv2D(256, kernel_size=3, strides= 2, padding='same', activation='relu'),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(2 * 32), # latent dim = 32
])

vae32_decoder = tf.keras.models.Sequential([
    tf.keras.layers.Dense(units=24*24*32, activation='relu', input_shape=(32,)),
    tf.keras.layers.Reshape(target_shape= (24, 24, 32)),
    tf.keras.layers.Conv2DTranspose(256, kernel_size = 3, strides= 2, padding='same', activation='relu'),
    tf.keras.layers.Conv2DTranspose(128, kernel_size = 3, strides= 1, padding='same', activation='relu'),
    tf.keras.layers.Conv2DTranspose(64, kernel_size = 3, strides= 1, padding='same', activation='relu'),
    tf.keras.layers.Conv2DTranspose(32, kernel_size = 3, strides= 2, padding='same', activation='relu'),
    tf.keras.layers.Conv2DTranspose(3,3, strides= 1, padding="same", activation="sigmoid")
])

#vae32_encoder.summary()
#vae32_decoder.summary()

vae32 = VAE(32, vae32_encoder, vae32_decoder)

In [None]:
# -- TRAINING OF VAE_32 MODEL -- #
for epoch in range(40):
    for train_x in train_images_a:
        train_step(vae32, train_x, optimizer)

    loss = tf.keras.metrics.Mean()
    for test_x in test_images_a:
        loss(compute_loss(vae32, test_x))
    variational_lower_bound = -loss.result()

    print(f'Epoch: {epoch+1}, Test set variational lower bound: {variational_lower_bound}')
    generate_and_show_images(vae32, epoch, test_sample)

In [None]:
# -- SAVE VAE_32 EN+DECODER WEIGHTS -- #
#vae16_encoder.save_weights(wd+'ds807_problem1_weights/saved_weights_VAE/VAE_32_encoder_weights.h5')
#vae16_decoder.save_weights(wd+'ds807_problem1_weights/saved_weights_VAE/VAE_32_decoder_weights.h5')

In [None]:
# -- FUNCTIONS FOR REPORT PLOTS -- #

# Plot sample reconstruction
def sample_VAE_reconstruction(model, idx = None):
    fig = plt.figure(figsize=(10,10))

    batch = next(iter(test_images_a))
    if idx is None:
        idx = random.randint(0,len(batch))
    print(idx)

    original_img = batch[idx].numpy()
    fig.add_subplot(1,2,1)
    plt.imshow(original_img)
    plt.axis('off')

    original_img_reshaped = original_img.reshape((1,)+original_img.shape)

    mean, log_var = model.encode(original_img_reshaped)
    z = model.reparameterize(mean, log_var)
    reconstructed_img = model.sample(z)
    reconstructed_img_reshaped = np.reshape(reconstructed_img, (96,96,3))
    fig.add_subplot(1,2,2)
    plt.imshow(reconstructed_img_reshaped)
    plt.axis('off')


# Plot sample VAE_16 reconstruction
# vae16_encoder.load_weights(wd+'ds807_problem1_weights/saved_weights_VAE/VAE_16_encoder_weights.h5')
# vae16_decoder.load_weights(wd+'ds807_problem1_weights/saved_weights_VAE/VAE_16_decoder_weights.h5')
# vae16_pre_trained = VAE(16, vae16_encoder, vae16_decoder)
# sample_VAE_reconstruction(vae16_pre_trained)

### VAE-based classification

In [41]:
# -- SET UP VAE-BASED CLASSIFIERS -- #

# Load and freeze encoder parameters
vae16_encoder.load_weights(wd+'saved_weights_VAE/VAE_16_encoder_weights.h5')
vae32_encoder.load_weights(wd+'saved_weights_VAE/VAE_32_encoder_weights.h5')

freeze_model_parameters(vae16_encoder)
freeze_model_parameters(vae32_encoder)

# Build classifiers

# VAE_16 classifier
vae16_classifier = tf.keras.models.Sequential([
    vae16_encoder,
    tf.keras.layers.Reshape((32,1)),
    tf.keras.layers.Conv1D(filters=128, kernel_size=3, activation='relu'),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Conv1D(filters=64, kernel_size=3, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Conv1D(filters=32, kernel_size=3, activation='relu'),
    tf.keras.layers.Dropout(0.1),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(units=128, activation='relu'),
    tf.keras.layers.Dense(units=2, activation='softmax')
])

# VAE_32 classifier
vae32_classifier = tf.keras.models.Sequential([
    vae32_encoder,
    tf.keras.layers.Reshape((64,1)),
    tf.keras.layers.Conv1D(filters=128, kernel_size=3, activation='relu'),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Conv1D(filters=64, kernel_size=3, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Conv1D(filters=32, kernel_size=3, activation='relu'),
    tf.keras.layers.Dropout(0.1),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(units=128, activation='relu'),
    tf.keras.layers.Dense(units=2, activation='softmax')
])

#vae16_classifier.summary()
#vae32_classifier.summary()

In [43]:
# -- SET UP TRAINING CALLBACK AND COMPILE CLASSIFIERS -- #

early_stopping_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                                           patience=3,
                                                           restore_best_weights=True,
                                                           verbose=1)

vae16_classifier.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

vae32_classifier.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
# -- TRAIN VAE_16 CLASSIFIER -- #

vae_16_tr_hist = vae16_classifier.fit(train_data_b,
                                    validation_data=validation_data_b,
                                    epochs=30,
                                    callbacks=[early_stopping_callback])

#vae16_classifier.save_weights(wd+'ds807_problem1_weights/saved_weights_VAE/VAE_16_classifier_weights.h5')

In [None]:
# -- TRAIN VAE_32 CLASSIFIER -- #

vae_32_tr_hist = vae32_classifier.fit(train_data_b,
                                    validation_data=validation_data_b,
                                    epochs=30,
                                    callbacks=[early_stopping_callback])

#vae32_classifier.save_weights(wd+'ds807_problem1_weights/saved_weights_VAE/VAE_32_classifier_weights.h5')

In [None]:
# -- EVALUATE CLASSIFIER AND PLOT TRAINING -- #

#vae16_classifier.evaluate(test_data_b)
#vae32_classifier.evaluate(test_data_b)

#plot_training_hist(vae_32_tr_hist, best_epoch=4)
plot_training_hist(vae_32_tr_hist, best_epoch=3)

## Transfer learning



### EfficientNet

In [120]:
# -- SETUP EFFICIENTNET0 -- #

# Set basemodel using EfficientNet0
base_model = EfficientNetB1(
    include_top =False,
    weights= "imagenet",
    input_shape=(96,96,3)
)

# Freeze base model:
base_model.trainable = False

# summary of base model
# base_model.summary()

In [121]:
# Create new model on top:
PCam_EffNet1 = tf.keras.models.Sequential()
PCam_EffNet1.add(base_model)

# Add layers for binary classification
PCam_EffNet1.add(tf.keras.layers.Flatten()),
PCam_EffNet1.add(tf.keras.layers.Dense(200, activation='relu')),
PCam_EffNet1.add(tf.keras.layers.Dense(100, activation='relu')),
PCam_EffNet1.add(tf.keras.layers.Dense(50, activation='relu')),
PCam_EffNet1.add(tf.keras.layers.Dense(2, activation='softmax'))

# Summary to get parameters
# PCam_basemodel.summary()

# Compiling model
PCam_EffNet1.compile(optimizer='adam',
                       loss= 'categorical_crossentropy',
                       metrics=['accuracy'])

# Implement early stopping
eff_net_early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                                          patience = 3,
                                                          restore_best_weights=True,
                                                          verbose=1)

In [122]:
PCam_EffNet1.summary()

Model: "sequential_22"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 efficientnetb1 (Functional  (None, 3, 3, 1280)        6575239   
 )                                                               
                                                                 
 flatten_25 (Flatten)        (None, 11520)             0         
                                                                 
 dense_64 (Dense)            (None, 200)               2304200   
                                                                 
 dense_65 (Dense)            (None, 100)               20100     
                                                                 
 dense_66 (Dense)            (None, 50)                5050      
                                                                 
 dense_67 (Dense)            (None, 2)                 102       
                                                     

In [None]:
# Fit EfficientNet0
eff_net_hist = PCam_EffNet1.fit(train_data_b,
                                  validation_data=validation_data_b,
                                  epochs= 50,
                                  callbacks= [eff_net_early_stopping])


In [112]:
#PCam_EffNet1.save_weights(wd+'ds807_problem1_weights/EffNet_weights/EffNet2_weights.h5')

In [123]:
#PCam_EffNet1.load_weights(wd+'ds807_problem1_weights/EffNet_weights/EffNet1_weights.h5')

In [124]:
# -- EVALUATE EffNet0 -- #
PCam_EffNet1.evaluate(test_data_b)



[0.5990039110183716, 0.71502685546875]

### ResNet50

In [58]:
# -- SET UP RESNET50 -- #

# Load w/o standard input-output layers
pretrained_ResNet50 = ResNet50(weights='imagenet',
                       include_top = False,
                       input_shape=(96,96,3))

In [None]:
# -- APPROACH 1 -- #
# Freeze entire ResNet50 such that only fully connected part is trainable

ResNet50_1 = pretrained_ResNet50
freeze_model_parameters(ResNet50_1)

# ResNet50_1.summary()

PCam_ResNet50_1 = tf.keras.models.Sequential()
PCam_ResNet50_1.add(ResNet50_1)

# Add layers for binary classification
PCam_ResNet50_1.add(tf.keras.layers.Flatten())
PCam_ResNet50_1.add(tf.keras.layers.Dense(250, activation='relu')),
PCam_ResNet50_1.add(tf.keras.layers.Dense(2, activation='softmax'))

# PCam_ResNet50_1.summary()

# Compile
PCam_ResNet50_1.compile(optimizer='adam',
                      loss = 'categorical_crossentropy',
                      metrics=['accuracy'])

In [126]:
# -- SET UP TRAINING CALLBACK -- #
ResNet50_early_stopping_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                                                    patience=10,
                                                                    restore_best_weights=True,
                                                                    verbose=1)

In [None]:
# -- TRAIN PCam_ResNet50_1 -- #
PCam_ResNet50_1_tr_hist = PCam_ResNet50_1.fit(train_data_b,
                                              validation_data=validation_data_b,
                                              epochs=50,
                                              callbacks=[ResNet50_early_stopping_callback])

#PCam_ResNet50_1.save_weights(wd+'ds807_problem1_weights/ResNet50_weights/PCam_ResNet50_1_weights.h5')

In [None]:
# -- EVALUATE PCam_ResNet50_1 AND PLOT TRAINING -- #
PCam_ResNet50_1.evaluate(test_data_b)
plot_training_hist(PCam_ResNet50_1_tr_hist, best_epoch=9)

In [None]:
# -- APPROACH 2 -- #

ResNet50_2 = pretrained_ResNet50

# Freeze all but last convolutional block of ResNet50
for layer in ResNet50_2.layers[:143]:
  layer.trainable = False

# ResNet50_2.summary()

PCam_ResNet50_2 = tf.keras.models.Sequential()
PCam_ResNet50_2.add(ResNet50_2)

# Add layers for binary classification
PCam_ResNet50_2.add(tf.keras.layers.Flatten())
PCam_ResNet50_2.add(tf.keras.layers.Dense(250, activation='relu')),
PCam_ResNet50_2.add(tf.keras.layers.Dense(2, activation='softmax'))

PCam_ResNet50_2.summary()

# Compile
PCam_ResNet50_2.compile(optimizer='adam',
                      loss = 'categorical_crossentropy',
                      metrics=['accuracy'])

In [None]:
# -- TRAIN PCam_ResNet50_2 -- #
PCam_ResNet50_2_tr_hist = PCam_ResNet50_2.fit(train_data_b,
                                              validation_data=validation_data_b,
                                              epochs=50,
                                              callbacks=[ResNet50_early_stopping_callback])

# PCam_ResNet50_2.save_weights(wd+'ds807_problem1_weights/ResNet50_weights/PCam_ResNet50_2_weights.h5')

In [None]:
# -- EVALUATE PCam_ResNet50_2 AND PLOT TRAINING -- #
PCam_ResNet50_2.evaluate(test_data_b)
plot_training_hist(PCam_ResNet50_2_tr_hist, best_epoch=2)