# Competition 1 - Image Classification
The competition 1 is about image classification. We need to use Tensorflow 2.0 (with keras) in order to perform classification of 20 different classes having a very poor dataset. We tried different approaches but here you will find the last one that permitted to reach 99 % of accuracy (the output of this notebook will be not the same we had in the challenge for computer performance reasons).
***
The first thing to do is import all the necessary libraries; tensorflow of course, but also numpy, image, os and all the libraries we need.

In [1]:
import tensorflow as tf

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import model_from_json
from tensorflow.keras.applications import InceptionResNetV2

from collections import Counter
from PIL import Image
import numpy as np

from os.path import isfile, isdir, join
from os import listdir

from datetime import datetime
from random import randrange
from tqdm import tqdm
import random, shutil, os, re, math

Now we search for any gpus... if we have at least one tensorflow will work in gpu mode. If not it will use the CPU.

In [2]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  try:
    # Currently, memory growth needs to be the same across GPUs
    for gpu in gpus:
      tf.config.experimental.set_memory_growth(gpu, True)
    logical_gpus = tf.config.experimental.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Memory growth must be set before GPUs have been initialized
    print(e)

## Parameters
Here we will set the parameters.
The IMAGE_DIMENSION must be 224 or 256 for **good performance**.
The seed permit to reproduce the experiments.
The validation split determines the percentage of our train-validation split. The default here is 15%.
The batch size must be equal to 64, 128 or 256. Try different values!
The loss MUST BE a categorical crossentropy because of the nature of our problem.
Try also different optimzers with different learning rates!


In [3]:
# Set the image dimension the model will use
IMAGE_DIMENSION = 75

# Set the seed for reproducible experiments
SEED = 1
tf.random.set_seed(SEED)  

# Get current working directory
cwd = os.getcwd()

# Position of the directories
dataset_dir = os.path.join(cwd, 'Dataset')
training_dir = os.path.join(dataset_dir, 'training')
validation_dir = os.path.join(dataset_dir, 'validation')
test_dir = os.path.join(dataset_dir, 'test')

classes = ['owl',
           'galaxy',
           'lightning',
           'wine-bottle',
           't-shirt',
           'waterfall',
           'sword',
           'school-bus',
           'calculator',
           'sheet-music',
           'airplanes',
           'lightbulb',
           'skyscraper',
           'mountain-bike',
           'fireworks',
           'computer-monitor',
           'bear',
           'grand-piano',
           'kangaroo',
           'laptop']

# Hyperparameter definition
img_h = IMAGE_DIMENSION
img_w = IMAGE_DIMENSION
num_classes = len(classes)

validation_split = 0.15
bs = 64 # Batch size

loss = tf.keras.losses.CategoricalCrossentropy()
#lr = 8e-5
optimizer = tf.keras.optimizers.Adam()
metrics = ['accuracy']

The cells that follow are needed to create the actually dataset to give to the model, with the validation split and the possibility to change the function in order to have personal dataset creation (we can have duplicates in our training set in order to repair the class weights, or to perform more augmentation on data ...).

In [4]:
# Clean previous experiments (if needed)
if os.path.exists('classification_experiments'):
    clearFolder('classification_experiments')

In [5]:
# This function cleare all the elements in a folder
def clearFolder(folder):
    for the_file in os.listdir(folder):
        file_path = os.path.join(folder, the_file)
        try:
            if os.path.isfile(file_path):
                os.unlink(file_path)
            elif os.path.isdir(file_path):
                shutil.rmtree(file_path)
        except Exception as e:
            print(e)

# In kaggle we have the dataset saved in a position that makes it unchangeable.
# This function creates a copy of the dataset and permits also to have images duplicates that,
# together with data augmentation, can improve performance.
def createActuallyDatasetFromInitOne(pathToInitDataset, pathToActuallyDataset):
    if not os.path.exists(pathToActuallyDataset):
        os.makedirs(pathToActuallyDataset, exist_ok=True)
    else:
        clearFolder(pathToActuallyDataset)
    if not os.path.exists(pathToActuallyDataset + '/training'):
        os.makedirs(pathToActuallyDataset + '/training', exist_ok=True)
    if not os.path.exists(pathToActuallyDataset + '/validation'):
        os.makedirs(pathToActuallyDataset + '/validation', exist_ok=True)
    if not os.path.exists(pathToActuallyDataset + '/test'):
        os.makedirs(pathToActuallyDataset + '/test', exist_ok=True)

    for dirname, _, filenames in os.walk(pathToInitDataset):
        print("Working on the directory " + dirname)
        dirname = dirname.replace(pathToInitDataset, '')
        for filename in filenames:
            if 'test' in dirname:
                shutil.copyfile(pathToInitDataset + '/test/' + filename, pathToActuallyDataset + '/test/' + filename)
                
            elif 'training' in dirname:
                dirname = dirname.replace('training', '')[1:]
                if len(dirname)>1:
                    if not os.path.exists(pathToActuallyDataset + '/training' + dirname):
                        os.makedirs(pathToActuallyDataset + '/training' + dirname, exist_ok=True)
                        
                    shutil.copyfile(pathToInitDataset + '/training' + dirname + '/' + filename, 
                                    pathToActuallyDataset + '/training' + dirname + '/' + filename)
                    dirname = '/training' + dirname
                    

# This function instead create a validation subset (no one was available in the original dataset).
def extractValidationData(classes, datasetPath, validation_split):
    sub_dirs = []
    for category in classes:
        sub_dirs.append(datasetPath + '/training/' + category)
        sub_dir_files = [f for subdir in sub_dirs for f in listdir(subdir) if isfile(join(subdir, f))]
        i = 0
        for file in sub_dir_files:
            os.rename(datasetPath + '/training/' + category + '/' + file, datasetPath + '/training/' + category + str(i) + '.jpg')
            i += 1
        sub_dirs.pop()

    sub_dirs.append(datasetPath + '/training')
    sub_dir_files = [f for subdir in sub_dirs for f in listdir(subdir) if isfile(join(subdir, f))]
    for file in sub_dir_files:
        digit = re.search(r"\d", file)
        index = digit.start()
        category = file[:index]
        source = datasetPath + '/training/'
        destination = datasetPath + '/training/' + category
        shutil.move(source+file, destination)

    for category in classes:
        if not os.path.exists(datasetPath + '/validation/' + category):
            os.makedirs(datasetPath + '/validation/' + category, exist_ok=True)

    directory = []

    for category in classes:
        source = datasetPath + "/training/" + category + '/'
        destination = datasetPath + "/validation/" + category + '/'
        path = datasetPath + '/training/' + category
        directory.append(path)
        sub_dir_files = [f for subdir in directory for f in listdir(subdir) if isfile(join(subdir, f))]
        maximum = len(sub_dir_files)
        validation_examples = math.ceil(validation_split * maximum)
        i = 0
        rand_indexes = []
        while i < validation_examples:
            index = randrange(maximum)
            if index not in rand_indexes:
                shutil.move(source+category+str(index)+'.jpg', destination)
                rand_indexes.append(index)
                i += 1   
        directory.pop()

In [6]:
createActuallyDatasetFromInitOne(os.path.join(cwd, 'Classification_Dataset'), 'Dataset')
extractValidationData(classes, 'Dataset', validation_split)

Working on the directory C:\Users\stive\Desktop\anndl-github\Competition 1\Classification_Dataset
Working on the directory C:\Users\stive\Desktop\anndl-github\Competition 1\Classification_Dataset\test
Working on the directory C:\Users\stive\Desktop\anndl-github\Competition 1\Classification_Dataset\training
Working on the directory C:\Users\stive\Desktop\anndl-github\Competition 1\Classification_Dataset\training\airplanes
Working on the directory C:\Users\stive\Desktop\anndl-github\Competition 1\Classification_Dataset\training\bear
Working on the directory C:\Users\stive\Desktop\anndl-github\Competition 1\Classification_Dataset\training\calculator
Working on the directory C:\Users\stive\Desktop\anndl-github\Competition 1\Classification_Dataset\training\computer-monitor
Working on the directory C:\Users\stive\Desktop\anndl-github\Competition 1\Classification_Dataset\training\fireworks
Working on the directory C:\Users\stive\Desktop\anndl-github\Competition 1\Classification_Dataset\traini

In [7]:
# Set of functions used to "prepare" the training images. 
# We tried to perform a "smart" crop of them in order to have better performance.
def prepare_image(image, thumbnail_size):
    x,y = image.size
    th_x, th_y = thumbnail_size

    #the image is smaller than the minimum thumbnail dimensions
    if x < th_x and y < th_y:
        return image 

    if x > y: #wide image
        im = square_wide_image(image)
    elif x < y: #tall image
        im = square_tall_image(image)
    else: #it's already square
        im = image  
    if im.mode != "RGB":
        im = im.convert('RGB')
    im.thumbnail(thumbnail_size, Image.ANTIALIAS)
    return im

def image_entropy(img):
    """calculate the entropy of an image"""
    hist = img.histogram()
    hist_size = sum(hist)
    hist = [float(h) / hist_size for h in hist]
    return -sum([p * math.log(p, 2) for p in hist if p != 0])

def square_wide_image(img):
    x,y = img.size
    while x > y:
        #slice 10px at a time until square
        slice_width = min(x - y, 10)
        right = img.crop((x-slice_width, 0, x, y))
        left = img.crop((0, 0, slice_width, y))

        #remove the slice with the least entropy
        if image_entropy(left) < image_entropy(right):
            img = img.crop((slice_width, 0, x, y)) #crop the left side
        else:
            img = img.crop((0,0,x-slice_width, y)) #crop the right side
        x,y = img.size
        return img

def square_tall_image(img):
    """if the image is taller than it is wide, square it off. determine
    which pieces to cut off based on the entropy pieces."""
    x,y = img.size
    while y > x:
        #slice 10px at a time until square
        slice_height = min(y - x, 10)
        bottom = img.crop((0, y - slice_height, x, y))
        top = img.crop((0, 0, x, slice_height))
        
        #remove the slice with the least entropy
        if image_entropy(bottom) < image_entropy(top):
            img = img.crop((0, 0, x, y - slice_height))
        else:
            img = img.crop((0, slice_height, x, y))
        x,y = img.size
    return img

def getResizedSquaredImageWithPadding(filename, input_size):
    img = Image.open(filename).convert('RGB')
    width = img.width
    height = img.height
    if width > height:
        imgScale = input_size / width
    else:
        imgScale = input_size / height
    newX, newY = int((img.width) * imgScale), int((img.height) * imgScale)
    img = img.resize((newX, newY))
    new = Image.new('RGB', (input_size, input_size), (255, 255, 255))
    bg_w, bg_h = new.size
    offset = ((bg_w - newX) // 2, (bg_h - newY) // 2)
    new.paste(img, offset)
    return new

## Data Augmentation
Here we let the possibility to apply data agumentation. Thanks to the ImageDataGenerator we can perform data augmentation in a very easy way. In the constructor we only need to set the different parameters and the job is done.
Search in the documentation exactly which parameters change what, because it is import to apply augmentation only when needed (for istance, we maybe don't want to perform vertical flips on helicopter images...).

In [8]:
apply_data_augmentation = False

if apply_data_augmentation:
    train_data_gen = ImageDataGenerator(shear_range=20,
                                        brightness_range=(0.5, 1.0),
                                        channel_shift_range=40.0,
                                        rotation_range=180,
                                        width_shift_range=100,
                                        height_shift_range=100,
                                        zoom_range=0.6,
                                        horizontal_flip=True,
                                        vertical_flip=True,
                                        fill_mode='nearest',
                                        cval=0,
                                        rescale=1. / 255)
else:
    train_data_gen = ImageDataGenerator(rescale=1. / 255)

valid_data_gen = ImageDataGenerator(rescale=1. / 255)
#test_data_gen = ImageDataGenerator(rescale=1. / 255)

With the flow_from_directory method, we can create our final generators!
***
We also compute the class weights, based on the number of samples per each class.

In [9]:
train_gen = train_data_gen.flow_from_directory(training_dir,
                                               batch_size=bs,
                                               target_size=(IMAGE_DIMENSION, IMAGE_DIMENSION),
                                               classes=classes,
                                               class_mode='categorical',
                                               shuffle=True,
                                               seed=SEED,
                                               interpolation='bicubic')  # targets are directly converted into one-hot vectors



counter = Counter(train_gen.classes)                          
max_val = float(max(counter.values()))       
class_weights = {class_id : max_val/num_images for class_id, num_images in counter.items()}   

print("\nThe following are the class weights based on the number of samples:\n" + str(class_weights) + "\n")

valid_gen = valid_data_gen.flow_from_directory(validation_dir,
                                               batch_size=bs,
                                               target_size=(IMAGE_DIMENSION, IMAGE_DIMENSION),
                                               classes=classes,
                                               class_mode='categorical',
                                               shuffle=True,
                                               interpolation='bicubic',
                                               seed=SEED)

train_dataset = tf.data.Dataset.from_generator(lambda: train_gen,
                                               output_types=(tf.float32, tf.float32),
                                               output_shapes=([None, img_h, img_w, 3], [None, num_classes]))

train_dataset = train_dataset.repeat()

valid_dataset = tf.data.Dataset.from_generator(lambda: valid_gen, 
                                               output_types=(tf.float32, tf.float32),
                                               output_shapes=([None, img_h, img_w, 3], [None, num_classes]))

valid_dataset = valid_dataset.repeat()

Found 1313 images belonging to 20 classes.

The following are the class weights based on the number of samples:
{0: 1.0625, 1: 1.8085106382978724, 2: 1.0, 3: 1.328125, 4: 1.0, 5: 1.4406779661016949, 6: 1.3076923076923077, 7: 1.3709677419354838, 8: 1.3492063492063493, 9: 1.7, 10: 1.0, 11: 1.5178571428571428, 12: 1.4406779661016949, 13: 1.7708333333333333, 14: 1.3492063492063493, 15: 1.0, 16: 1.3076923076923077, 17: 1.4406779661016949, 18: 1.7708333333333333, 19: 1.0}

Found 241 images belonging to 20 classes.


## Transfer Learning
What do you do if you have very few samples in you dataset but the classes aren't so particular (here the classes for istance are "owl", "airplane" and so on)? You use transfer learning!

Here we used the second version of InceptionResNet, after many tests with VGG, normal ResNet and many others.
***
We will train also the last levels of the network, setting trainable as false only for the first 600 levels.

In [10]:
conv_base = InceptionResNetV2(weights='imagenet', include_top=False, input_shape=(IMAGE_DIMENSION,IMAGE_DIMENSION,3))

for layer in conv_base.layers[:600]:
    layer.trainable = False
for layer in conv_base.layers[600:]:
    layer.trainable = True

print(conv_base.summary())

Model: "inception_resnet_v2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 75, 75, 3)]  0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 37, 37, 32)   864         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 37, 37, 32)   96          conv2d[0][0]                     
__________________________________________________________________________________________________
activation (Activation)         (None, 37, 37, 32)   0           batch_normalization[0][0]        
________________________________________________________________________________

__________________________________________________________________________________________________
conv2d_155 (Conv2D)             (None, 3, 3, 192)    215040      activation_154[0][0]             
__________________________________________________________________________________________________
batch_normalization_152 (BatchN (None, 3, 3, 192)    576         conv2d_152[0][0]                 
__________________________________________________________________________________________________
batch_normalization_155 (BatchN (None, 3, 3, 192)    576         conv2d_155[0][0]                 
__________________________________________________________________________________________________
activation_152 (Activation)     (None, 3, 3, 192)    0           batch_normalization_152[0][0]    
__________________________________________________________________________________________________
activation_155 (Activation)     (None, 3, 3, 192)    0           batch_normalization_155[0][0]    
__________

## Model
Here we have the model we used. First we can see the class ConvBlock, used in previous tests; we let that here if you need it.
Secondly we have our model definition; we will go more in the details of it.
***
- The model starts with a 10% dropout of the input.
- Then we have the InceptionResNet model addition, followed by a global average pooling.
- After that we can find out "model tail": 3 levels of fully connected with a descending number of neurons and 3 different drop out levels, always to avoid overfit.
- Last we find our "softmax" level, with a number of neurons equal to the numers of classes we want to predict.

In [11]:
class ConvBlock(tf.keras.Model):
    def __init__(self, num_filters, kernel, padding, maxPoolSize):
        super(ConvBlock, self).__init__()
        self.conv2d = tf.keras.layers.Conv2D(filters=num_filters,
                                             kernel_size=(kernel, kernel),
                                             strides=(1, 1), 
                                             padding=padding)
        self.activation = tf.keras.layers.ReLU()  # we can specify the activation function directly in Conv2D
        self.pooling = tf.keras.layers.MaxPool2D(pool_size=(maxPoolSize, maxPoolSize))

    def call(self, inputs):
        x = self.conv2d(inputs)
        x = self.activation(x)
        x = self.pooling(x)
        return x

class CNNClassifierTransfer(tf.keras.Model):
    def __init__(self, num_classes, conv_base):
        super(CNNClassifierTransfer, self).__init__()

        self.feature_extractor = tf.keras.Sequential()
        self.feature_extractor.add(tf.keras.layers.Dropout(0.1, seed=SEED))
        self.feature_extractor.add(conv_base)
        self.feature_extractor.add(tf.keras.layers.GlobalAveragePooling2D())
    
        self.classifier = tf.keras.Sequential()
        self.classifier.add(tf.keras.layers.Dense(units=1024, activation='relu'))
        self.classifier.add(tf.keras.layers.Dropout(0.2, seed=SEED))
        self.classifier.add(tf.keras.layers.Dense(units=512, activation='relu'))
        self.classifier.add(tf.keras.layers.Dropout(0.3, seed=SEED))
        self.classifier.add(tf.keras.layers.Dense(units=256, activation='relu'))
        self.classifier.add(tf.keras.layers.Dropout(0.2, seed=SEED))
        self.classifier.add(tf.keras.layers.Dense(units=num_classes, activation='softmax'))

    def call(self, inputs):
        x = self.feature_extractor(inputs)
        x = self.classifier(x)
        return x


We create the model, we build it and we compile it, specifying optimizer, loss function and metric.

In [12]:
model = CNNClassifierTransfer(num_classes=num_classes, conv_base=conv_base)
model.build(input_shape=(None, img_h, img_w, 3))
model.compile(optimizer=optimizer, loss=loss, metrics=metrics)
print(model.summary())

Model: "cnn_classifier_transfer"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
sequential (Sequential)      multiple                  54336736  
_________________________________________________________________
sequential_1 (Sequential)    multiple                  2235156   
Total params: 56,571,892
Trainable params: 28,820,244
Non-trainable params: 27,751,648
_________________________________________________________________
None


If we want to restore the weights of previous runs we can set the LOAD_FROM_FILE to tue and load the file "my_model_weights.h5" (we save this at the end of the train).

In [13]:
LOAD_FROM_FILE = False

try:
    if LOAD_FROM_FILE:
        if os.path.exists('my_model_weights.h5'):
            model.load_weights('my_model_weights.h5')
except Exception as e:
    print(e)

Here we can set ES_TB to true to perform an early stopping control (if the network goes bad for some epochs se stop the train) and to save checkpoints in order to check the situation with tensor board. Here all is setted in order to reduce the CPU work.

In [14]:
# Early Stopping & TensorBoard
# --------------
ES_TB = False
if ES_TB:
    cwd = os.getcwd()

    exps_dir = os.path.join(cwd, 'classification_experiments')
    if not os.path.exists(exps_dir):
        os.makedirs(exps_dir)

    now = datetime.now().strftime('%b%d_%H-%M-%S')

    model_name = 'CNN'

    exp_dir = os.path.join(exps_dir, model_name + '_' + str(now))
    if not os.path.exists(exp_dir):
        os.makedirs(exp_dir)

    callbacks = []

    # Model checkpoint
    # ----------------
    ckpt_dir = os.path.join(exp_dir, 'ckpts')
    if not os.path.exists(ckpt_dir):
        os.makedirs(ckpt_dir)

    ckpt_callback = tf.keras.callbacks.ModelCheckpoint(filepath=os.path.join(ckpt_dir, 'cp_{epoch:02d}.ckpt'),
                                                       save_weights_only=True)  # False to save the model directly
    callbacks.append(ckpt_callback)

    # Visualize Learning on Tensorboard
    # ---------------------------------
    tb_dir = os.path.join(exp_dir, 'tb_logs')
    if not os.path.exists(tb_dir):
        os.makedirs(tb_dir)

    # By default shows losses and metrics for both training and validation
    tb_callback = tf.keras.callbacks.TensorBoard(log_dir=tb_dir,
                                                 profile_batch=0,
                                                 histogram_freq=1)  # if 1 shows weights histograms
    #callbacks.append(tb_callback)

    early_stop = True
    if early_stop:
        es_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)
        callbacks.append(es_callback)

## Training the model and computing the final predictions
Finally we can call the model.fit function! Here we can set the number of epochs we want to perform (always for performance issues, here we have 1 epoch only).
After the train we save our weights (in case we want to use them in the future).

Then we have a trained model, to use it we cycle over all the test images predicting the class of every image. All the results are save in a csv, that we had to submit to the kaggle competition.

In [15]:
FIT_MODEL = True
if FIT_MODEL:
    try:
        model.fit(x=train_dataset,
                  epochs=1,  #### set repeat in training dataset
                  steps_per_epoch=len(train_gen),
                  validation_data=valid_gen,
                  validation_steps=len(valid_gen),
                  class_weight=class_weights)
        pass
    except Exception as e:
        print(e)

    model.save_weights('my_model_weights.h5')

  ...
    to  
  ['...']
Train for 21 steps, validate for 4 steps


In [16]:
def computeFinalCSVFile(datasetPath, IMAGE_DIMENSION, model, TEST_TYPE):
    print("-----------------")
    print("STARTING TO SAVE PREDICTS CSV FILE with TEST_TYPE " + str(TEST_TYPE))
    print("-----------------")
    
    image_filenames = next(os.walk(datasetPath + '/test'))[2]
    
    results = {}
    csv_fname = 'results_'
    csv_fname += datetime.now().strftime('%b%d_%H-%M-%S') + '__' + str(TEST_TYPE) + '__.csv'

    with open(os.path.join('./', csv_fname), 'w') as f:
        f.write('Id,Category\n')
        for image_name in image_filenames:
            if TEST_TYPE is 0:
                img = getResizedSquaredImageWithPadding(datasetPath + '/test/' + image_name, IMAGE_DIMENSION)
            elif TEST_TYPE is 1:
                img = Image.open(datasetPath + '/test/' + image_name).convert('RGB')
                img = img.resize((IMAGE_DIMENSION,IMAGE_DIMENSION))
            elif TEST_TYPE is 2:
                img = Image.open(datasetPath + '/test/' + image_name).convert('RGB')
                img = prepare_image(img, (IMAGE_DIMENSION,IMAGE_DIMENSION))
                img = img.resize((IMAGE_DIMENSION,IMAGE_DIMENSION))

            x = np.array(img).astype('float32') / 255
            x = np.expand_dims(x, 0)
            pred = np.argmax(model.predict(x))

            f.write(image_name + ',' + str(pred) + '\n')
            
try:           
    computeFinalCSVFile('Dataset', IMAGE_DIMENSION, model, 1)
except Exception as e:
    print(e)

-----------------
STARTING TO SAVE PREDICTS CSV FILE with TEST_TYPE 1
-----------------
