# Bengali.AI Competition - ResNet-38 Training (Ensemble)

### Team MuchLearningSuchWow

This notebook contains code for training the ResNet-38 network we used in our ensemble. It is connected to Weights and Biases in order to keep track of progress and performance.

## Imports

In [None]:
import os

from tqdm.auto import tqdm
import cv2
import pandas as pd
import keras
import numpy as np
from sklearn.model_selection import train_test_split
from keras.callbacks import ReduceLROnPlateau
import psutil

from keras.layers import Conv2D, BatchNormalization, Activation, Add, MaxPool2D, Dense, \
    Dropout, GlobalAveragePooling2D, Concatenate, Input, Flatten, AveragePooling2D, Add
from keras import Model
from keras.regularizers import l2

import gc
import wandb
from wandb.keras import WandbCallback

## Filenames

In [None]:
train_filename = 'input/bengaliai-cv19/train.csv'
model_filename = 'output/model_resnet38.hdf5'

In [None]:
if not os.path.isdir('output'):
    os.mkdir('output')

## Loading Dataframes

In [None]:
train_df_ = pd.read_csv(train_filename)
train_df_ = train_df_.drop(['grapheme'], axis=1)

## Weights and Biases

In [None]:
run = wandb.init(project='bengali')

In [None]:
config = run.config
config.blocks = [
    {
        'width': 64,
        'output_width': 128,
        'cardinality': 4,
        'count': 2
    },
    {
        'width': 128,
        'output_width': 256,
        'cardinality': 4,
        'count': 2
    },
    {
        'width': 256,
        'output_width': 512,
        'cardinality': 4,
        'count': 2
    },
    {
        'width': 512,
        'output_width': 1024,
        'cardinality': 4,
        'count': 3
    },
    {
        'width': 1024,
        'output_width': 1024,
        'cardinality': 4,
        'count': 3
    }
]
config.iChannels = 32
config.epochs = 180
config.max_lr = 0.0016
config.min_lr = 0.0004
config.n_cycles = 8
config.dropout = 0.4
config.batch_size = 240
config.validation_split = 0.08
config.resnet_depth = 38
config.steps_per_epoch = int(200840*(1-config.validation_split))//config.batch_size//4

## Building ResNet-38 Model

In [None]:
def resnet_layer(inputs, num_filters=16, kernel_size=3, strides=1,
                 activation='relu', batch_normalization=True, conv_first=True):
    """2D Convolution-Batch Normalization-Activation stack builder

    # Arguments
        inputs (tensor): input tensor from input image or previous layer
        num_filters (int): Conv2D number of filters
        kernel_size (int): Conv2D square kernel dimensions
        strides (int): Conv2D square stride dimensions
        activation (string): activation name
        batch_normalization (bool): whether to include batch normalization
        conv_first (bool): conv-bn-activation (True) or
            bn-activation-conv (False)

    # Returns
        x (tensor): tensor as input to the next layer
    """
    conv = Conv2D(num_filters,
                  kernel_size=kernel_size,
                  strides=strides,
                  padding='same',
                  kernel_initializer='he_normal',
                  kernel_regularizer=l2(1e-4))

    x = inputs
    if conv_first:
        x = conv(x)
        if batch_normalization:
            x = BatchNormalization()(x)
        if activation is not None:
            x = Activation(activation)(x)
    else:
        if batch_normalization:
            x = BatchNormalization()(x)
        if activation is not None:
            x = Activation(activation)(x)
        x = conv(x)
    return x

In [None]:
def build_resnet(depth, input_shape=(64, 64, 1)):
    """ResNet Version 2 Model builder [b]

    Stacks of (1 x 1)-(3 x 3)-(1 x 1) BN-ReLU-Conv2D or also known as
    bottleneck layer
    First shortcut connection per layer is 1 x 1 Conv2D.
    Second and onwards shortcut connection is identity.
    At the beginning of each stage, the feature map size is halved (downsampled)
    by a convolutional layer with strides=2, while the number of filter maps is
    doubled. Within each stage, the layers have the same number filters and the
    same filter map sizes.
    Features maps sizes:
    conv1  : 32x32,  16
    stage 0: 32x32,  64
    stage 1: 16x16, 128
    stage 2:  8x8,  256

    # Arguments
        input_shape (tensor): shape of input image tensor
        depth (int): number of core convolutional layers
        num_classes (int): number of classes (CIFAR10 has 10)

    # Returns
        model (Model): Keras model instance
    """
    if (depth - 2) % 9 != 0:
        raise ValueError('depth should be 9n+2 (eg 56 or 110 in [b])')
    # Start model definition.
    num_filters_in = 16
    num_res_blocks = int((depth - 2) / 9)

    inputs = Input(shape=input_shape)
    # v2 performs Conv2D with BN-ReLU on input before splitting into 2 paths
    x = resnet_layer(inputs=inputs,
                     num_filters=num_filters_in,
                     conv_first=True)

    # Instantiate the stack of residual units
    for stage in range(3):
        for res_block in range(num_res_blocks):
            activation = 'relu'
            batch_normalization = True
            strides = 1
            if stage == 0:
                num_filters_out = num_filters_in * 4
                if res_block == 0:  # first layer and first stage
                    activation = None
                    batch_normalization = False
            else:
                num_filters_out = num_filters_in * 2
                if res_block == 0:  # first layer but not first stage
                    strides = 2    # downsample

            # bottleneck residual unit
            y = resnet_layer(inputs=x,
                             num_filters=num_filters_in,
                             kernel_size=1,
                             strides=strides,
                             activation=activation,
                             batch_normalization=batch_normalization,
                             conv_first=False)
            y = resnet_layer(inputs=y,
                             num_filters=num_filters_in,
                             conv_first=False)
            y = resnet_layer(inputs=y,
                             num_filters=num_filters_out,
                             kernel_size=1,
                             conv_first=False)
            if res_block == 0:
                x = resnet_layer(inputs=x,
                                 num_filters=num_filters_out,
                                 kernel_size=1,
                                 strides=strides,
                                 activation=None,
                                 batch_normalization=False)
            x = Add()([x, y])

        num_filters_in = num_filters_out

    # Add classifier on top.
    # v2 has BN-ReLU before Pooling
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = AveragePooling2D(pool_size=8)(x)
    y = Flatten()(x)

    y = Dense(2048, activation="relu")(y)
    y = BatchNormalization()(y)
    dense = Dense(1024, activation="relu", kernel_initializer='he_normal')(y)

    head_root = Dense(168, activation='softmax', kernel_initializer='he_normal', name='dense_a')(dense)
    head_vowel = Dense(11, activation='softmax', kernel_initializer='he_normal', name='dense_b')(dense)
    head_consonant = Dense(7, activation='softmax', kernel_initializer='he_normal', name='dense_c')(dense)

    # Instantiate model.
    model = Model(inputs=inputs, outputs=[head_root, head_vowel, head_consonant])
    return model

In [None]:
model = build_resnet(config.resnet_depth)

model.compile(optimizer='adam', 
              loss='categorical_crossentropy', 
              loss_weights=[2, 1, 1], 
              metrics=['accuracy', keras.metrics.Recall()])

## Training

In [None]:
validation_steps = int(200840*config.validation_split)//config.batch_size//4

In [None]:
class MultiOutputDataGenerator(keras.preprocessing.image.ImageDataGenerator):

    def flow(self,
             x,
             y=None,
             batch_size=32,
             shuffle=True,
             sample_weight=None,
             seed=None,
             save_to_dir=None,
             save_prefix='',
             save_format='png',
             subset=None):

        targets = None
        target_lengths = {}
        ordered_outputs = []
        for output, target in y.items():
            if targets is None:
                targets = target
            else:
                targets = np.concatenate((targets, target), axis=1)
            target_lengths[output] = target.shape[1]
            ordered_outputs.append(output)

        for flowx, flowy in super().flow(x, targets, batch_size=batch_size,
                                         shuffle=shuffle):
            target_dict = {}
            i = 0
            for output in ordered_outputs:
                target_length = target_lengths[output]
                target_dict[output] = flowy[:, i: i + target_length]
                i += target_length

            yield flowx, target_dict

In [None]:
learning_rate_reduction_root = ReduceLROnPlateau(monitor='dense_a_accuracy',
                                                 patience=4,
                                                 verbose=1,
                                                 factor=0.25,
                                                 min_lr=1e-10)
learning_rate_reduction_vowel = ReduceLROnPlateau(monitor='dense_b_accuracy',
                                                  patience=4,
                                                  verbose=1,
                                                  factor=0.25,
                                                  min_lr=1e-10)
learning_rate_reduction_consonant = ReduceLROnPlateau(monitor='dense_c_accuracy',
                                                      patience=4,
                                                      verbose=1,
                                                      factor=0.25,
                                                      min_lr=1e-10)

In [None]:
datagen = MultiOutputDataGenerator(
            featurewise_center=False,  # set input mean to 0 over the dataset
            samplewise_center=False,  # set each sample mean to 0
            featurewise_std_normalization=False,  # divide inputs by std of the dataset
            samplewise_std_normalization=False,  # divide each input by its std
            zca_whitening=False,  # apply ZCA whitening
            rotation_range=8,  # randomly rotate images in the range (degrees, 0 to 180)
            zoom_range=0.15,  # Randomly zoom image
            width_shift_range=0.15,  # randomly shift images horizontally (fraction of total width)
            height_shift_range=0.15,  # randomly shift images vertically (fraction of total height)
            horizontal_flip=False,  # randomly flip images
            vertical_flip=False,   # randomly flip images
            rescale=1.0/255.0,
            validation_split=config.validation_split)

train_df_['image_id'] = train_df_['image_id'].astype(str)+'.png'

Y_train_root = pd.get_dummies(train_df_.set_index('image_id')['grapheme_root'], dtype=np.float32)
Y_train_vowel = pd.get_dummies(train_df_.set_index('image_id')['vowel_diacritic'], dtype=np.float32)
Y_train_consonant = pd.get_dummies(train_df_.set_index('image_id')['consonant_diacritic'], dtype=np.float32)

def generator_wrapper(gen: MultiOutputDataGenerator, df: pd.DataFrame, subset: str, batch_size: int):
    for flowx, flowy in gen.flow_from_dataframe(df, 
                                                color_mode='grayscale', 
                                                directory='data', 
                                                x_col='image_id',
                                                y_col='image_id', 
                                                class_mode='raw', 
                                                target_size=(64, 64), 
                                                subset=subset, 
                                                batch_size=batch_size, 
                                                shuffle=True):
        yield flowx, {
            'dense_a': Y_train_root.loc[flowy].values,
            'dense_b': Y_train_vowel.loc[flowy].values,
            'dense_c': Y_train_consonant.loc[flowy].values,
        }

validation_generator = generator_wrapper(datagen, train_df_, 'validation', config.batch_size)
model.fit_generator(generator_wrapper(datagen, train_df_, 'training', config.batch_size), 
                    validation_data=validation_generator, 
                    validation_steps=validation_steps,
                    epochs=config.epochs, 
                    steps_per_epoch=config.steps_per_epoch,
                    callbacks=[learning_rate_reduction_root, 
                               learning_rate_reduction_vowel, 
                               learning_rate_reduction_consonant,
                               WandbCallback(data_type='image')])

## Saving Model

In [None]:
model.save(os.path.join(wandb.run.dir, model_filename))