# Connect to google drive

In [None]:
from google.colab import drive
drive.mount('/gdrive')
%cd /gdrive/My Drive/[2023-2024] AN2DL/Homework 1

# Import librieries and set parameters

In [None]:
# Fix randomness and hide warnings
seed = 80

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ['PYTHONHASHSEED'] = str(seed)
os.environ['MPLCONFIGDIR'] = os.getcwd()+'/configs/'

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=Warning)

import numpy as np
np.random.seed(seed)

import logging

import random
random.seed(seed)

In [None]:
# Import tensorflow
import tensorflow as tf
from tensorflow import keras as tfk
from tensorflow.keras import layers as tfkl
tf.autograph.set_verbosity(0)
tf.get_logger().setLevel(logging.ERROR)
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)
print(tf.__version__)

In [None]:
# Import other libraries
#library for computer vision
import cv2
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix
import seaborn as sns

# Data upload and preprocessing

In [None]:
# Conditional check for unzipping
unzip = False

if unzip:
    !unzip public_data.zip


In [None]:
data=np.load('public_data.npz', allow_pickle=True)
#Load the images not normalized
images_not_normalized = data['data']
labels_strings= data['labels']
label_map = {"healthy": 0, "unhealthy": 1}
labels = np.vectorize(label_map.get)(labels_strings)
print(images_not_normalized.shape)
print(labels.shape)

In [None]:
#Normalize images
images=[]
for img in images_not_normalized:
  img=(img/255).astype(np.float32)
  images.append(img)

images= np.array(images)
print(img)

In [None]:
#DATASET CLEANING!
#We remove all the outliers, manually found while inspecting the dataset
indices= np.array([ 58, 95, 137, 138, 171, 207, 338,  412, 434, 486, 506, 529, 571, 599, 622, 658, 692, 701, 723, 725, 753, 779, 783, 827, 840, 880, 898, 901, 961, 971, 974, 989,
 1028, 1044, 1064, 1065, 1101, 1149, 1172, 1190, 1191, 1265, 1268, 1280, 1333, 1384, 1443, 1466, 1483, 1528, 1541, 1554, 1594, 1609, 1630, 1651, 1690, 1697, 1752, 1757, 1759,
 1806, 1828, 1866, 1903, 1938, 1939, 1977, 1981, 1988, 2022, 2081, 2090, 2150, 2191, 2192, 2198, 2261, 2311, 2328, 2348, 2380, 2426, 2435, 2451, 2453, 2487, 2496, 2515, 2564, 2581,
 2593, 2596, 2663, 2665, 2676, 2727, 2734, 2736, 2755, 2779, 2796, 2800, 2830, 2831, 2839, 2864, 2866, 2889, 2913, 2929, 2937, 3033, 3049, 3055, 3086, 3105, 3108, 3144, 3155, 3286,
 3376, 3410, 3436, 3451, 3488, 3490, 3572, 3583, 3666, 3688, 3700, 3740, 3770, 3800, 3801, 3802, 3806, 3811, 3821, 3835, 3862, 3885, 3896, 3899, 3904, 3927, 3931, 3946, 3950, 3964,
 3988, 3989, 4049, 4055, 4097, 4100, 4118, 4144, 4150, 4282, 4310, 4314, 4316, 4368, 4411, 4475, 4476, 4503, 4507, 4557, 4605, 4618, 4694, 4719, 4735, 4740, 4766, 4779, 4837, 4848,
 4857, 4860, 4883, 4897, 4903, 4907, 4927, 5048, 5080, 5082, 5121, 5143, 5165, 5171])
print(indices.shape)
mask = np.ones(len(images), dtype=bool)
mask[indices]=False
images = images[mask]
labels = labels[mask]
print(images.shape)

# Class imbalance

We noticed by counting the elements class-wise an imbalance between the positive and the negative class.

In [None]:
count0=0
count1=0
for i in range(labels.shape[0]):
  if labels[i]==0:
    count0=count0+1
  else:
    count1=count1+1

print(count0)
print(count1)

#class 1 is much less rapresented!!

# First attempt - Dataset balancing trough targeted data augmentation

In [None]:
count0=0
count1=0
for i in range(labels.shape[0]):
  if labels[i]==0:
    count0=count0+1
  else:
    count1=count1+1

print(count0)
print(count1)

#class 1 is much less rapresented!!

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Define data augmentation settings
#We tried to reason on which transformations to apply in order to avoid the network learning wrong features
#For example, we didn't use blurring! It would teach the network that blurry images -> class 1! It wouldn't generalize.
data_augmentation = ImageDataGenerator(
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Augment only class 1
images_augmented = []
labels_augmented = []

for i in range(len(images)):
    if labels[i]==1 and count0>count1:
      augmented_sample=data_augmentation.random_transform(images[i])
      images_augmented.append(augmented_sample)
      labels_augmented.append(1)
      count1=count1+1
    images_augmented.append(images[i])
    labels_augmented.append(labels[i])

images_augmented = np.array(images_augmented)
labels_augmented = np.array(labels_augmented)

# Now we have x_train_augmented and y_train_augmented, where data augmentation is only applied to samples belonging to class 1

print(images_augmented.shape)
print(labels_augmented.shape)


In [None]:
count0=0
count1=0
for i in range(labels_augmented.shape[0]):
  if labels_augmented[i]==0:
    count0=count0+1
  else:
    count1=count1+1

print(count0)
print(count1)

#Now ther are equal!!

# Fix labels, split samples

In [None]:
labels_augmented=np.expand_dims(labels_augmented,axis=1)
print(labels_augmented.shape)

In [None]:
 #one hot encoding
labels_augmented = tfk.utils.to_categorical(labels_augmented,len(np.unique(labels_augmented)))

In [None]:
#split data into training and validation
X_train_augmented, X_val_augmented, y_train_augmented, y_val_augmented = train_test_split(images_augmented, labels_augmented, random_state=seed, test_size=.25, stratify=np.argmax(labels_augmented,axis=1))

In [None]:
# Print shapes of the datasets
print(f"X_train shape: {X_train_augmented.shape}, y_train shape: {y_train_augmented.shape}")
print(f"X_val shape: {X_val_augmented.shape}, y_val shape: {y_val_augmented.shape}")

In [None]:
# Define input shape, output shape, batch size, and number of epochs
input_augmented_shape = X_train_augmented.shape[1:]
output_augmented_shape = y_train_augmented.shape[1:]
batch_size = 32
epochs = 100

# Print input shape, batch size, and number of epochs
print(f"Input Shape: {input_augmented_shape}, Output Shape: {output_augmented_shape}, Batch Size: {batch_size}, Epochs: {epochs}")

# Train EffNetV2S

In [None]:
# Load the pre-trained EfficientNetV2-S model
effnetv2s_model = tf.keras.applications.EfficientNetV2S(
    include_top=False, #don't load the fully connected aprt
    weights="imagenet",
    input_shape=input_augmented_shape,
    pooling='avg',
    include_preprocessing=True,
)
effnetv2s_model.trainable = False

In [None]:
#A layer that applies the augmentations
preprocessing = tf.keras.Sequential([
    tfkl.RandomFlip("vertical"),
    tfkl.RandomFlip("horizontal"),
    tfkl.RandomRotation(0.5),
    tfkl.RandomZoom(0.1)
], name='preprocessing')

inputs = tfk.Input(shape=input_augmented_shape)
x=preprocessing(inputs)
x = effnetv2s_model(x)
# Add the first Dense layer with 256 neurons and ReLU activation
x = tfkl.Dense(256, activation='relu')(x)
x = tfkl.BatchNormalization()(x)
# Add the second Dense layer with 128 neurons and ReLU activation
x = tfkl.Dense(128, activation='relu')(x)
x = tfkl.BatchNormalization()(x)

# Add the final Dense layer with 2 units and softmax activation as the classifier
outputs = tfkl.Dense(2, activation='softmax')(x)

# Create a Model connecting input and output
model = tfk.Model(inputs=inputs, outputs=outputs, name='model')

# Compile the model with Categorical Cross-Entropy loss and Adam optimizer
model.compile(loss=tfk.losses.CategoricalCrossentropy(), optimizer=tfk.optimizers.AdamW(1e-4, weight_decay=5e-4), metrics=['accuracy'])

# Display model summary
model.summary()

In [None]:
callbacks=[
    tfk.callbacks.EarlyStopping(monitor='val_accuracy', patience=25, restore_best_weights=True, mode='max'),
    tfk.callbacks.ReduceLROnPlateau(monitor="val_accuracy", factor=0.1, patience=20, min_lr=1e-5, mode='max')
]

In [None]:
effnetv2s_history = model.fit(
    x = preprocess_input(X_train_augmented*255.0), #Effnet expects input in range 0-255
    y = y_train_augmented,
    batch_size = 32,
    epochs = 1000,
    validation_data = (preprocess_input(X_val_augmented*255.0), y_val_augmented), # Same for the validation set
    callbacks = callbacks
).history

In [None]:
# Plot the training
plt.figure(figsize=(15,5))
plt.plot(effnetv2s_history['loss'], alpha=.3, color='#ff7f0e', linestyle='--')
plt.plot(effnetv2s_history['val_loss'], label='EffnetV2S', alpha=.8, color='#ff7f0e')
plt.legend(loc='upper left')
plt.title('Categorical Crossentropy')
plt.grid(alpha=.3)

plt.figure(figsize=(15,5))
plt.plot(effnetv2s_history['accuracy'], alpha=.3, color='#ff7f0e', linestyle='--')
plt.plot(effnetv2s_history['val_accuracy'], label='EffnetV2S', alpha=.8, color='#ff7f0e')
plt.legend(loc='upper left')
plt.title('Accuracy')
plt.grid(alpha=.3)

plt.show()

In [None]:
model.save('EffNetTargetedAugmentation_TL')

In [None]:
ft_modelS = tfk.models.load_model('EffNetTargetedAugmentation_TL')

In [None]:
#In the way efficient net works, we need not to unfreeze the batch normalization layers
ft_modelS.get_layer('efficientnetv2-s').trainable = True
for i, layer in enumerate(ft_modelS.get_layer('efficientnetv2-s').layers):
  if 'bn' in layer.name:
    layer.trainable=False
  print(i, layer.name, layer.trainable)

In [None]:
#We decided to freeze up to the 200-th layer
N = 200
for i, layer in enumerate(ft_modelS.get_layer('efficientnetv2-s').layers[:N]):
  layer.trainable=False
for i, layer in enumerate(ft_modelS.get_layer('efficientnetv2-s').layers):
   print(i, layer.name, layer.trainable)
ft_modelS.summary()

In [None]:
ft_modelS.compile(loss=tfk.losses.CategoricalCrossentropy(),\
                  optimizer=tfk.optimizers.AdamW(1e-5,weight_decay=5e-5),\
                  metrics=['accuracy'])

In [None]:
ft_modelS_history = ft_modelS.fit(
    x = X_train1*255, # We need to apply the preprocessing thought for the MobileNetV2 network
    y = y_train1,
    batch_size = 32,
    epochs = 1000,
    validation_data = (X_val*255, y_val), # We need to apply the preprocessing thought for the MobileNetV2 network
    callbacks = [tfk.callbacks.EarlyStopping(monitor='val_accuracy', mode='max', patience=25, restore_best_weights=True),
                 tfk.callbacks.ReduceLROnPlateau(monitor="val_accuracy", factor=0.1, patience=20, min_lr=1e-6, mode='max')]
).history

In [None]:
ft_modelS.save('EffNetTargetedAugmentation_FT')

# Second attempt - SMOTE

We proceeded in this way:


1. Split the data into training and validation;
2. Use SMOTE as a class balancer;
3. Double check that the training set is now balanced.

In [None]:
count0=0
count1=0
for i in range(labels.shape[0]):
  if labels[i]==0:
    count0=count0+1
  else:
    count1=count1+1

print(count0)
print(count1)

#class 1 is much less rapresented!!

In [None]:
X_train, X_val, y_train, y_val = train_test_split(images, labels, random_state=seed, test_size=.25, stratify=labels)
print(X_train.shape[0], X_val.shape[0])

In [None]:
 #One hot encoding
y_val = tfk.utils.to_categorical(y_val,len(np.unique(y_val)))

In [None]:
from imblearn.over_sampling import SMOTE
sm = SMOTE(random_state=42)
X_train = X_train.reshape(X_train.shape[0], 96 * 96 * 3)
X_train, y_train = sm.fit_resample(X_train, y_train)
X_train = X_train.reshape(X_train.shape[0], 96, 96, 3)
count0=0
for i in range(y_train.shape[0]):
  if y_train[i] == 0:
    count0+=1
print(count0, y_train.shape[0]-count0)

In [None]:
 #One hot encoding
y_train = tfk.utils.to_categorical(y_train,len(np.unique(y_train)))
print(y_train.shape)

Inspect the new images produced

In [None]:
imgs_to_show=100
startToShowFrom=3000

fig, axes = plt.subplots(10, 10, figsize=(30, 20))

# Reshape the axes to a 1D array for easier indexing
axes = axes.ravel()

for i in range(imgs_to_show):
    axes[i].imshow(X_train[i+startToShowFrom])
    axes[i].set_title(f'i: {y_train[i + startToShowFrom]}')
    axes[i].axis('off')

# Ensure tight layout
plt.tight_layout()

# Show the grid of images with labels
plt.show()

In [None]:
input_shape = X_train.shape[1:]
output_shape = y_train.shape[1:]
batch_size = 32
epochs = 100

# Print input shape, batch size, and number of epochs
print(f"Input Shape: {input_shape}, Output Shape: {output_shape}, Batch Size: {batch_size}, Epochs: {epochs}")

# Train EffNetV2S

In [None]:
effnet_s = tf.keras.applications.EfficientNetV2S(
    include_top=False, #Don't import fully connected layers
    weights="imagenet",
    input_tensor=None,
    input_shape=(96, 96, 3),
    pooling="avg",
    classifier_activation="softmax",
    include_preprocessing=True,
)
tfk.utils.plot_model(effnet_s, show_shapes=True)

In [None]:
effnet_s.trainable = False
tf.random.set_seed(seed)
#A layer that applies the data augmentation
preprocessing = tf.keras.Sequential([
    tfkl.RandomFlip("vertical"),
    tfkl.RandomFlip("horizontal"),
    tfkl.RandomRotation(0.5),
    tfkl.RandomZoom(0.3)
], name='preprocessing')


inputs = tfk.Input(shape=(96, 96, 3))
preprocessing = preprocessing(inputs)
x = effnet_s(preprocessing)
x = tfkl.Dense(units=256, kernel_initializer=tfk.initializers.HeUniform(seed=seed), name='HiddenDense1')(x)
x = tfkl.Activation('relu', name='HiddenActivation1')(x)
x = tfkl.BatchNormalization()(x)
x = tfkl.Dense(units=128, kernel_initializer=tfk.initializers.HeUniform(seed=seed), name='HiddenDense2')(x)
x = tfkl.Activation('relu', name='HiddenActivation2')(x)
x = tfkl.BatchNormalization()(x)
# Add a Dense layer with 2 units and softmax activation as the classifier
outputs = tfkl.Dense(2, activation='softmax')(x)

eff_modelS = tfk.Model(inputs=inputs, outputs=outputs, name='model')

eff_modelS.compile(loss=tfk.losses.CategoricalCrossentropy(),\
                  optimizer=tfk.optimizers.AdamW(1e-4,weight_decay=5e-4),\
                  metrics=['accuracy'])
eff_modelS.summary()

In [None]:
# Train the model
eff_S_history = eff_modelS.fit(
    x = X_train*255, # EfficientNet expects inputs in range 0.255
    y = y_train,
    batch_size = 32,
    epochs = 1000,
    validation_data = (X_val*255, y_val), # Same for the validation set
    callbacks = [tfk.callbacks.EarlyStopping(monitor='val_accuracy', mode='max', patience=25, restore_best_weights=True),
                 tfk.callbacks.ReduceLROnPlateau(monitor="val_accuracy", factor=0.1, patience=20, min_lr=1e-5, mode='max')]
).history

In [None]:
eff_modelS.save('EffnetSmall_SMOTE_TL')

In [None]:
ft_modelS = tfk.models.load_model('EffnetSmall_SMOTE_TL')

In [None]:
#In the way efficient net works, we need not to unfreeze the batch normalization layers
ft_modelS.get_layer('efficientnetv2-s').trainable = True
for i, layer in enumerate(ft_modelS.get_layer('efficientnetv2-s').layers):
  if 'bn' in layer.name:
    layer.trainable=False
  print(i, layer.name, layer.trainable)

In [None]:
#We decided to freeze up to the 200-th layer
N = 200
for i, layer in enumerate(ft_modelS.get_layer('efficientnetv2-s').layers[:N]):
  layer.trainable=False
for i, layer in enumerate(ft_modelS.get_layer('efficientnetv2-s').layers):
   print(i, layer.name, layer.trainable)
ft_modelS.summary()

In [None]:
ft_modelS.compile(loss=tfk.losses.CategoricalCrossentropy(),\
                  optimizer=tfk.optimizers.AdamW(1e-5,weight_decay=5e-5),\
                  metrics=['accuracy'])

In [None]:
ftSmall_history = ft_modelS.fit(
    x = X_train1*255,
    y = y_train1,
    batch_size = 32,
    epochs = 1000,
    validation_data = (X_val*255, y_val),
    callbacks = [tfk.callbacks.EarlyStopping(monitor='val_accuracy', mode='max', patience=25, restore_best_weights=True),
                 tfk.callbacks.ReduceLROnPlateau(monitor="val_accuracy", factor=0.1, patience=20, min_lr=1e-6, mode='max')]
).history

In [None]:
ft_modelS.save('EffNetSmall_SMOTE_FT')