### Importing the libraries

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf
import tensorflow.keras.applications.efficientnet as efn
    
print(f'tf version: {tf.__version__}')

tf version: 2.3.1


### Defining some variables

In [2]:
EFNETS = (efn.EfficientNetB0, efn.EfficientNetB1, efn.EfficientNetB2, efn.EfficientNetB3, 
          efn.EfficientNetB4, efn.EfficientNetB5, efn.EfficientNetB6, efn.EfficientNetB7)
IMSIZES = (224, 240, 260, 300, 380, 456, 528, 600)
# made for convenience, EFNETS[i] corresponds to EfficientNetBi
# and IMSIZES[i] corresponds to EfficientNetBi Input image size

EFNET_NO = 1 # EfficientNetB1 performed the best
BATCH_SIZE = 64
IMAGE_SIZE = IMSIZES[EFNET_NO]

N_CLASSES = 9 # we've got 9 emotions to predict
SEED = 17 # just for train_test_split, where I define validation dataset
IMAGES_DIR = '/kaggle/input/skillbox-emotions/'
TEST_DIR = IMAGES_DIR + '/test_kaggle'

train_df = pd.read_csv('/kaggle/input/skillbox-computer-vision-project/train.csv').iloc[:, 1:]
sub = pd.read_csv('/kaggle/input/skillbox-computer-vision-project/sample_submission.csv')

train_df, val_df = train_test_split(train_df, test_size=0.1, random_state=SEED)
# this split is made only for validation part extraction, we have a test set 
# to evaluate our model on via Kaggle

### Creating data generators
Here we create data_generators adding some augmentations for training dataset, which helped to improve generalization power of the model

In [3]:
train_data_gen = tf.keras.preprocessing.image.ImageDataGenerator(
                                    horizontal_flip=True, rotation_range=15, 
                                    width_shift_range=0.15, height_shift_range=0.15)
val_data_gen = tf.keras.preprocessing.image.ImageDataGenerator()
test_data_gen = tf.keras.preprocessing.image.ImageDataGenerator()

In [4]:
train_data = train_data_gen.flow_from_dataframe(
              train_df, directory=IMAGES_DIR, x_col='image_path', y_col='emotion', class_mode='sparse',
              target_size=(IMAGE_SIZE, IMAGE_SIZE), batch_size=BATCH_SIZE, shuffle=True
          )
val_data = val_data_gen.flow_from_dataframe(
              val_df, directory=IMAGES_DIR, x_col='image_path', y_col='emotion', class_mode='sparse',
              target_size=(IMAGE_SIZE, IMAGE_SIZE), batch_size=BATCH_SIZE, shuffle=False
          )
test_data = test_data_gen.flow_from_dataframe(
              sub, directory=TEST_DIR, x_col='image_path', y_col=None, class_mode=None,
              target_size=(IMAGE_SIZE, IMAGE_SIZE), batch_size=BATCH_SIZE, shuffle=False
          )

Found 45041 validated image filenames belonging to 9 classes.
Found 5005 validated image filenames belonging to 9 classes.
Found 5000 validated image filenames.


In [5]:
MAPPING = {k: i for i, k in train_data.class_indices.items()} # to decode predictions

### Defining the model
I wrote a child class of tf.keras.Sequential just adding a TTA prediction method which averages the predictions on base image and it's flipped copy

In [6]:
class MySequential(tf.keras.Sequential):
    def predict_tta(self, data_generator):
        tta_preds = []
        i = 0 # counter
        images_count = data_generator.n # to be able to stop the loop
        for curr_im_batch in data_generator:
            for curr_img in curr_im_batch:
                pred = self.predict(curr_img[None, ...])
                pred_f = self.predict(np.flip(curr_img, axis=1)[None, ...])
                tta_pred = np.stack((pred, pred_f)).mean(axis=0)[0]
                tta_preds.append(tta_pred)
                
                i += 1 # for process monitoring
                if i % 1000 == 0:
                    print(f'{i}/{images_count} images processed')
            
            # now we check whether it's time to stop the loop
            # otherwise we will iterate the generator forever    
            if i == images_count:
                print('Done')
                break
            
        return np.array(tta_preds)

In [7]:
model = MySequential([
    EFNETS[EFNET_NO](
        input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3),
        weights='imagenet', # defining the model with imagenet weights makes the training process faster
        include_top=False),
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dense(N_CLASSES, activation='softmax')
])
model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss='sparse_categorical_crossentropy',
    metrics='accuracy')
model.summary()

Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb1_notop.h5
Model: "my_sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
efficientnetb1 (Functional)  (None, 8, 8, 1280)        6575239   
_________________________________________________________________
global_average_pooling2d (Gl (None, 1280)              0         
_________________________________________________________________
dense (Dense)                (None, 9)                 11529     
Total params: 6,586,768
Trainable params: 6,524,713
Non-trainable params: 62,055
_________________________________________________________________


In [8]:
checkpoint = tf.keras.callbacks.ModelCheckpoint(
    'model.h5', save_best_only=True, monitor='val_accuracy', mode='max', verbose=1)
lr_reducer = tf.keras.callbacks.ReduceLROnPlateau(
    monitor="val_accuracy", patience=3, min_lr=3e-7, mode='max')

In [9]:
EPOCHS = 20
history = model.fit(
    train_data, validation_data=val_data, 
    epochs=EPOCHS, callbacks=[checkpoint, lr_reducer])

Epoch 1/20
Epoch 00001: val_accuracy improved from -inf to 0.49151, saving model to model.h5
Epoch 2/20
Epoch 00002: val_accuracy improved from 0.49151 to 0.49670, saving model to model.h5
Epoch 3/20
Epoch 00003: val_accuracy did not improve from 0.49670
Epoch 4/20
Epoch 00004: val_accuracy improved from 0.49670 to 0.52547, saving model to model.h5
Epoch 5/20
Epoch 00005: val_accuracy improved from 0.52547 to 0.52967, saving model to model.h5
Epoch 6/20
Epoch 00006: val_accuracy improved from 0.52967 to 0.53067, saving model to model.h5
Epoch 7/20
Epoch 00007: val_accuracy improved from 0.53067 to 0.53666, saving model to model.h5
Epoch 8/20
Epoch 00008: val_accuracy did not improve from 0.53666
Epoch 9/20
Epoch 00009: val_accuracy did not improve from 0.53666
Epoch 10/20
Epoch 00010: val_accuracy did not improve from 0.53666
Epoch 11/20
Epoch 00011: val_accuracy improved from 0.53666 to 0.55105, saving model to model.h5
Epoch 12/20
Epoch 00012: val_accuracy did not improve from 0.5510

In [10]:
model.load_weights('model.h5')

### Making predictions and submitting results

In [11]:
tta_preds = model.predict_tta(test_data)

1000/5000 images processed
2000/5000 images processed
3000/5000 images processed
4000/5000 images processed
5000/5000 images processed
Done


In [12]:
def make_submission(preds, submission_df=sub, out_file='submission.csv', class_names=MAPPING):
    preds = preds.argmax(axis=-1)
    preds = [class_names[i] for i in preds]
    submission_df['emotion'] = preds
    submission_df.to_csv(out_file, index=False)
    submission_df.head()

In [13]:
make_submission(preds=tta_preds, out_file='tta_preds.csv')

Got 0.5736 categorization accuracy at private leaderboard