In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf
import tensorflow.keras.applications.efficientnet as efn
import scipy as sp
import os
from os.path import isfile, join

EFNETS = (efn.EfficientNetB0, efn.EfficientNetB1, efn.EfficientNetB2, efn.EfficientNetB3, 
          efn.EfficientNetB4, efn.EfficientNetB5, efn.EfficientNetB6, efn.EfficientNetB7)
    
print(f'tf version: {tf.__version__}')

tf version: 2.3.1


In [2]:
N_CLASSES = 9
SEED = 17
IMAGES_DIR = '/kaggle/input/skillbox-emotions/'
TEST_DIR = IMAGES_DIR + '/test_kaggle'

train_df = pd.read_csv('/kaggle/input/skillbox-computer-vision-project/train.csv').iloc[:, 1:]
sub = pd.read_csv('/kaggle/input/skillbox-computer-vision-project/sample_submission.csv')
images_test_filenames =  [f for f in os.listdir(TEST_DIR) if isfile(join(TEST_DIR, f))]

train_df, val_df = train_test_split(train_df, test_size=0.1, random_state=SEED)

In [3]:
train_data_gen = tf.keras.preprocessing.image.ImageDataGenerator(
                                    horizontal_flip=True, rotation_range=10, 
                                    brightness_range=[0.75, 1.25], 
                                    width_shift_range=0.15, height_shift_range=0.15)
val_data_gen = tf.keras.preprocessing.image.ImageDataGenerator()
test_data_gen = tf.keras.preprocessing.image.ImageDataGenerator()

In [4]:
EFNET_NO = 4
BATCH_SIZE = 16
IMSIZES = (224, 240, 260, 300, 380, 456, 528, 600)
IMAGE_SIZE = IMSIZES[EFNET_NO]

train_data = train_data_gen.flow_from_dataframe(
              train_df, directory=IMAGES_DIR, x_col='image_path', y_col='emotion', class_mode='sparse',
              target_size=(IMAGE_SIZE, IMAGE_SIZE), batch_size=BATCH_SIZE, shuffle=True
          )
val_data = val_data_gen.flow_from_dataframe(
              val_df, directory=IMAGES_DIR, x_col='image_path', y_col='emotion', class_mode='sparse',
              target_size=(IMAGE_SIZE, IMAGE_SIZE), batch_size=BATCH_SIZE, shuffle=False
          )
test_data = test_data_gen.flow_from_dataframe(
              sub, directory=TEST_DIR, x_col='image_path', y_col=None, class_mode=None,
              target_size=(IMAGE_SIZE, IMAGE_SIZE), batch_size=BATCH_SIZE, shuffle=False
          )

Found 45041 validated image filenames belonging to 9 classes.
Found 5005 validated image filenames belonging to 9 classes.
Found 5000 validated image filenames.


In [5]:
model = tf.keras.Sequential([
    EFNETS[EFNET_NO](
        input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3),
        weights='imagenet',
        include_top=False),
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dense(N_CLASSES, activation='softmax')
])
model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss='sparse_categorical_crossentropy',
    metrics='accuracy')
model.summary()

Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb4_notop.h5
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
efficientnetb4 (Functional)  (None, 12, 12, 1792)      17673823  
_________________________________________________________________
global_average_pooling2d (Gl (None, 1792)              0         
_________________________________________________________________
dense (Dense)                (None, 9)                 16137     
Total params: 17,689,960
Trainable params: 17,564,753
Non-trainable params: 125,207
_________________________________________________________________


In [6]:
checkpoint = tf.keras.callbacks.ModelCheckpoint(
    'model.h5', save_best_only=True, monitor='val_accuracy', mode='max', verbose=1)
lr_reducer = tf.keras.callbacks.ReduceLROnPlateau(
    monitor="val_accuracy", patience=3, min_lr=3e-7, mode='max')

In [7]:
history = model.fit(
    train_data, validation_data=val_data, 
    epochs=7, callbacks=[checkpoint, lr_reducer])

Epoch 1/7
Epoch 00001: val_accuracy improved from -inf to 0.45155, saving model to model.h5
Epoch 2/7
Epoch 00002: val_accuracy improved from 0.45155 to 0.48112, saving model to model.h5
Epoch 3/7
Epoch 00003: val_accuracy improved from 0.48112 to 0.48971, saving model to model.h5
Epoch 4/7
Epoch 00004: val_accuracy improved from 0.48971 to 0.52128, saving model to model.h5
Epoch 5/7
Epoch 00005: val_accuracy improved from 0.52128 to 0.53447, saving model to model.h5
Epoch 6/7
Epoch 00006: val_accuracy did not improve from 0.53447
Epoch 7/7
Epoch 00007: val_accuracy did not improve from 0.53447


In [8]:
model.load_weights('model.h5')

In [9]:
mapping = {k: i for i, k in train_data.class_indices.items()}

In [10]:
test_preds = model.predict_classes(test_data)
test_preds = [mapping[i] for i in test_preds]
sub['emotion'] = test_preds
sub.to_csv('preds.csv', index=False)