#  Setup
1. Jupyter Environment Setup
2. Dataset Split in ImageDataGenerator format

In [None]:
colab = True # set this to true and activate next cell

%tensorflow_version 2.x
from google.colab import drive
drive.mount('/content/drive')
!cp "/content/drive/My Drive/artificial-neural-networks-and-deep-learning-2020.zip" .
!mkdir Results
!cp "/content/drive/My Drive/tlX.h5" ./Results
!unzip -q artificial-neural-networks-and-deep-learning-2020.zip

In [None]:
# Cell output set up for Jupyter
from pathlib import Path
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [None]:
import json
import random

SEED = 1234
random.seed(SEED)

test_pictures_n = 1684 # 70/30 ratio for training and validation
target_file_name = "train_gt.json"
dataset_name = "MaskDataset"

# Setting up directory structure
Path().joinpath(dataset_name, "validation").mkdir(parents=True, exist_ok=True)
Path().joinpath(dataset_name, "training", "NO_PERSON").mkdir(parents=True, exist_ok=True)
Path().joinpath(dataset_name, "training", "ALL_THE_PEOPLE").mkdir(parents=True, exist_ok=True)
Path().joinpath(dataset_name, "training", "SOMEONE").mkdir(parents=True, exist_ok=True)
Path().joinpath(dataset_name, "validation", "NO_PERSON").mkdir(parents=True, exist_ok=True)
Path().joinpath(dataset_name, "validation", "ALL_THE_PEOPLE").mkdir(parents=True, exist_ok=True)
Path().joinpath(dataset_name, "validation", "SOMEONE").mkdir(parents=True, exist_ok=True)

# Files are moved from the training directory to the corresponding folders
# both for training and for validation
with open(str(Path().joinpath(dataset_name, target_file_name))) as f:
    data = json.load(f)
    pictures = list(data.keys())
    random.shuffle(pictures)
    validation_pictures = pictures[0:test_pictures_n]
    for path in Path().joinpath(dataset_name, "training").glob("*.jpg"):
        if path.name in validation_pictures:
            file_destination = str(Path().joinpath(dataset_name, "validation", path.name))
            path.rename(file_destination)
            path = Path(file_destination)
        if data[path.name] == 0:
            path.rename(str(Path(path.parent).joinpath("NO_PERSON", path.name)))
        elif data[path.name] == 1:
            path.rename(str(Path(path.parent).joinpath("ALL_THE_PEOPLE", path.name)))
        elif data[path.name] == 2:
            path.rename(str(Path(path.parent).joinpath("SOMEONE", path.name)))
        else:
            raise ValueError("Unrecognized label in " + target_file_name + " allowed values are 0, 1, 2 found: " + str(data[path.name]))
            

# Imports and Random seed setting

In [None]:
import tensorflow as tf
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D, AveragePooling2D, DepthwiseConv2D, LeakyReLU
from tensorflow.keras.layers import Activation, Dropout, Flatten, Dense, BatchNormalization
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.constraints import max_norm
import scipy as sp
from PIL import Image
from datetime import datetime
tf.random.set_seed(SEED)

# Dataset setup: augmentation and batch size

In [None]:
# Batch Size
bs = 32

# Target Image Shape (max 358 x 256)
img_h = 256
img_w = 256

# this is the augmentation configuration we will use for training
train_data_gen = ImageDataGenerator(
            rescale=1./255,
            rotation_range=10,
            zoom_range=1,
            width_shift_range=0.1, 
            height_shift_range=0.1,
            channel_shift_range = 30,
            horizontal_flip = True,)

valid_data_gen = ImageDataGenerator(rescale=1./255)

dataset_dir = Path().joinpath(dataset_name)

num_classes = 3
classes = ["NO_PERSON",
          "ALL_THE_PEOPLE",
          "SOMEONE"]

training_dir = dataset_dir.joinpath("training")
train_gen = train_data_gen.flow_from_directory(str(training_dir),
                                              batch_size=bs,
                                              classes=classes,
                                              class_mode="categorical",
                                              shuffle=True,
                                              target_size=(img_h, img_w),
                                              seed=SEED)

validation_dir = dataset_dir.joinpath("validation")
valid_gen = valid_data_gen.flow_from_directory(str(validation_dir),
                                              batch_size=bs,
                                              classes=classes,
                                              class_mode="categorical",
                                              shuffle=True,
                                              target_size=(img_h, img_w),
                                              seed=SEED)

train_dataset = tf.data.Dataset.from_generator(lambda: train_gen,
                                              output_types=(tf.float32, tf.float32),
                                              output_shapes=([None, img_h, img_w, 3], [None, num_classes]))
train_dataset = train_dataset.repeat()

valid_dataset = tf.data.Dataset.from_generator(lambda: valid_gen, 
                                              output_types=(tf.float32, tf.float32),
                                              output_shapes=([None, img_h, img_w, 3], [None, num_classes]))
valid_dataset = valid_dataset.repeat()

Found 5164 images belonging to 3 classes.
Found 450 images belonging to 3 classes.


# Class Weighting
In the given datasets classes are not represented with an equal amount of samples, weighting deals with such issue.

In [None]:
from collections import Counter
itemCt = Counter(train_gen.classes)
maxCt = float(max(itemCt.values()))
class_weight = {clsID : maxCt/numImg for clsID, numImg in itemCt.items()}

## LeNet+: LeNet with 1 convolutional layer more and ReLu
LaNet is a simple model originally conceived for handwritten digits classification. The problem at hand is far harder thus a slightly more complex network has been chosen as a starting point. ReLu was not used either in LaNet but as of todays proves to be a very solid choice as an activation function.

model = Sequential()

# First Convolution
model.add(Conv2D(32, (3, 3), input_shape=(img_h, img_w, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

# Second Convolution
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

# Third Convolution
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())  # this converts our 3D feature maps to 1D feature vectors
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(units=num_classes, activation="softmax"))

model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])
epochs = 50

# LeNetb: LeNet+ with Batch Normalization
TODO: retrain this with no dropout, may be the answer to validation instability (should still not work incredibly well)

model = Sequential()

# First Convolution
model.add(Conv2D(32, (3, 3), input_shape=(img_h, img_w, 3)))
model.add(Dropout(0.5))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

# Second Convolution
model.add(Conv2D(32, (3, 3)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

# Third Convolution
model.add(Conv2D(64, (3, 3)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())  # this converts our 3D feature maps to 1D feature vectors
model.add(Dense(64))
model.add(Dropout(0.5))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dense(units=num_classes, activation="softmax"))

model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])
epochs = 50

# Xception Loading

In [None]:
# Xception richiede input normalizzati in ingresso (procedura già svolta in ImageDataGenerator)
base_model = tf.keras.applications.Xception(
    weights='imagenet',
    input_shape=(img_h, img_w, 3),
    include_top=False)

# tlX: transfer learning with Xception

base_model.trainable = False
inputs = tf.keras.Input(shape=(img_h, img_w, 3))
x = base_model(inputs, training=False) # training flag previene l'aggiornamento dei layer di Batch Normalization (importante per il fine tuning)
x = GlobalAveragePooling2D()(x)
x = Dropout(0.2)(x)
outputs = Dense(units=num_classes, activation="softmax")(x)
model = tf.keras.Model(inputs, outputs)
model.compile(optimizer="adam",
              loss="categorical_crossentropy",
              metrics=["accuracy"]) 
epochs = 20

# ftX: fine tuning with Xception

if not colab:
    model = tf.keras.models.load_model(Path().joinpath("Results", "tlX.h5"))
else:
    model = tf.keras.models.load_model("/content/Results/tlX.h5")

# ChennaiNet: improving tlX
Using tlX and ftX as a starting point the same training procedure is applied but the pooling is changed, a flattening layer and a dense layer with 0.5 Dropout are added taking inspiration from https://arxiv.org/pdf/2009.08369.pdf .

base_model.trainable = False
inputs = tf.keras.Input(shape=(img_h, img_w, 3))
x = base_model(inputs, training=False) # training flag previene l'aggiornamento dei layer di Batch Normalization (importante per il fine tuning)
x = AveragePooling2D(pool_size=(5, 5))(x)
x = Flatten()(x)
x = Dense(128, kernel_constraint=max_norm(4.))(x)
x = Activation("relu")
x = Dropout(0.5)
x = Dense(units=num_classes, activation="softmax") 


outputs = Dense(units=num_classes, activation="softmax")(x)
model = tf.keras.Model(inputs, outputs)
model.compile(optimizer="adam",
              loss="categorical_crossentropy",
              metrics=["accuracy"]) 
epochs = 20

if not colab:
    model = tf.keras.models.load_model(Path().joinpath("Results", "chennai_net.h5"))
else:
    model = tf.keras.models.load_model("/content/Results/chennai_net.h5")

# KadapaNet: more parameters and back to global pooling

In [None]:
base_model.trainable = False 
inputs = tf.keras.Input(shape=(img_h, img_w, 3))
x = base_model(inputs, training=False) # training flag previene l'aggiornamento dei layer di Batch Normalization (importante per il fine tuning)
x = GlobalAveragePooling2D()(x)
x = Dense(2048, activation="relu")(x)
  
outputs = Dense(units=num_classes, activation="softmax")(x)
epochs = 40
model = tf.keras.Model(inputs, outputs)
model.compile(optimizer= "adam",
              loss="categorical_crossentropy",
              metrics=["accuracy"])

if not colab:
    model = tf.keras.models.load_model(Path().joinpath("Results", "kadapa_net.h5"))
else:
    model = tf.keras.models.load_model("/content/Results/kadapa_net.h5")

# LebakaNet: one more dense layer
This is the classifier structure used in the Xception paper https://arxiv.org/abs/1610.02357, Dropout layers are not present understand if adding them is needed

In [None]:
base_model.trainable = False 
inputs = tf.keras.Input(shape=(img_h, img_w, 3))
x = base_model(inputs, training=False) # training flag previene l'aggiornamento dei layer di Batch Normalization (importante per il fine tuning)
x = GlobalAveragePooling2D()(x)
x = Dense(2048, activation="relu")(x)
# x = Dropout(0)
x = Dense(2048, activation="relu")(x)
# x = Dropout(0)
  
outputs = Dense(units=num_classes, activation="softmax")(x)
epochs = 40
model = tf.keras.Model(inputs, outputs)
model.compile(optimizer= "adam",
              loss="categorical_crossentropy",
              metrics=["accuracy"])

# Set re-loaded transfer model to trainable for fine tuning

for layer in model.layers:
    if isinstance(layer, tf.python.keras.engine.training.Model):
        base_model = layer
base_model.trainable = True
model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-5),  # Low learning rate
    loss="categorical_crossentropy",
    metrics=["accuracy"],
)
epochs = 10

## Callbacks

In [None]:
callbacks_dir = Path().joinpath("Callbacks")
callbacks_dir.mkdir(parents=True, exist_ok=True)

now = datetime.now().strftime("%b%d_%H-%M-%S")

model_name = "CNN"

callback_dir = callbacks_dir.joinpath(model_name + '_' + str(now))
callback_dir.mkdir(parents=True, exist_ok=True)

callbacks = []

# Model checkpoint
ckpt_dir = callback_dir.joinpath("ckpts")
ckpt_dir.mkdir(parents=True, exist_ok=True)

ckpt_callback = tf.keras.callbacks.ModelCheckpoint(filepath=str(ckpt_dir.joinpath("cp.ckpt")), 
                                                   save_weights_only=True)
callbacks.append(ckpt_callback)

# Visualize Learning on Tensorboard
tb_dir = callback_dir.joinpath("tb_logs")
tb_dir.mkdir(parents=True, exist_ok=True)
    
tb_callback = tf.keras.callbacks.TensorBoard(log_dir=str(tb_dir),
                                             profile_batch=0,
                                             histogram_freq=1) 
callbacks.append(tb_callback)

# Early Stopping
early_stop = True
if early_stop:
    es_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)
    callbacks.append(es_callback)
    
# Learning Rate Annhealing
learning_rate_reduction=ReduceLROnPlateau(monitor='val_accuracy', patience=3, verbose=0, factor=0.5, min_lr=0.00001)
lr_annhealing = True

if lr_annhealing:
    callbacks.append(learning_rate_reduction)

# Tensorboard
After running the cell below open [this link](http://localhost:6009) to view Tensorboard on a full browser page

In [None]:
%load_ext tensorboard
%tensorboard --logdir ./Callbacks --port 6009

# Model Fit

In [None]:
model.fit(x=train_dataset,
          epochs=epochs,
          steps_per_epoch=len(train_gen),
          validation_data=valid_dataset,
          validation_steps=len(valid_gen), 
          callbacks=callbacks,
          class_weight = class_weight)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
 73/323 [=====>........................] - ETA: 2:41 - loss: 0.8846 - accuracy: 0.5651

In [None]:
model.save(Path().joinpath("Results", "model" + datetime.now().strftime('%b%d_%H-%M-%S')+ ".h5"))

# Prediction

## Test Time Augmentation

In [None]:
tta = False

def flip_lr(images):
    return np.flip(images, axis=2)

def shift(images, shift, axis):
    return np.roll(images, shift, axis=axis)

def rotate(images, angle):
    return sp.ndimage.rotate(
        images, angle, axes=(1,2),
        reshape=False, mode='nearest')

def combine_predictions(predictions):
    pred_agg = np.mean(predictions, axis=0)
    preds = np.argmax(pred_agg, axis=-1)
    return preds

def tta_predict(m, x_test):
    pred = m.predict(x_test)

    pred_f = m.predict(flip_lr(x_test))

    pred_w0 = m.predict(shift(x_test, -3, axis=2))
    pred_w1 = m.predict(shift(x_test, 3, axis=2))

    pred_h0 = m.predict(shift(x_test, -3, axis=1))
    pred_h1 = m.predict(shift(x_test, 3, axis=1))

    pred_r0 = m.predict(rotate(x_test, -10))
    pred_r1 = m.predict(rotate(x_test, 10))
    # out = combine_predictions(np.stack((pred, pred_h0, pred_h1, pred_w0, pred_w1, pred_f, pred_r0, pred_r1)))
    out = combine_predictions(np.stack((pred, pred_h0, pred_h1, pred_w0, pred_w1, pred_f)))
    return out

In [None]:
results = {}
for path in Path().joinpath(dataset_name, "test").glob("*.jpg"):
    image = Image.open(str(path)).convert("RGB")
    image = image.resize((img_w, img_h), Image.ANTIALIAS)
    image = np.array(image)
    image = np.expand_dims(image, 0)
    image = np.float32(image) / 255.0
    if not tta:
        results[path.name]= model.predict(image).argmax(axis=-1)[0] 
    else:
        results[path.name] = tta_predict(model, image)[0]

In [None]:
csv_fname = "results_" + datetime.now().strftime('%b%d_%H-%M-%S') + '.csv'
Path().joinpath("Results").mkdir(parents=True, exist_ok=True)
with open(Path().joinpath("Results", csv_fname), "w") as f:
    f.write("Id,Category\n")
    for key, value in results.items():
        f.write(key + ',' + str(value) + '\n')