<font size="5">Code for getting predictions on Yoga poses within [kaggle competion](https://www.kaggle.com/competitions/ukraine-ml-bootcamp-2023).</font>

<font size="3">Prepare dataset to be used by tensorflow image generator</font>

In [2]:
import csv
import os
import shutil

base_dir = '/kaggle/input/ml-bootcamp-competition-2023'
train_dir = os.path.join(base_dir, 'images', 'train_images')
test_dir = os.path.join(base_dir, 'images', 'test_images')
train_csv = os.path.join(base_dir, 'train.csv')

with open(train_csv, 'r') as file:
  csvreader = csv.reader(file)
  next(csvreader)
  for row in csvreader:
    class_dir = os.path.join('/tmp/train_images', row[1])
    yoga_file = os.path.join(train_dir, row[0])
    os.makedirs(class_dir, exist_ok=True)
    if not os.path.isfile(os.path.join(class_dir, row[0])):
        shutil.copyfile(yoga_file, os.path.join(class_dir, row[0]))

<font size="3">Add f1 score metric as it's the evaluation metric for this competition.</font>

In [4]:
from tensorflow.keras import backend as K

def custom_f1_score(y_true, y_pred):
    # Define the true positives, false positives and false negatives
    tp = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    fp = K.sum(K.round(K.clip(y_pred - y_true, 0, 1)))
    fn = K.sum(K.round(K.clip(y_true - y_pred, 0, 1)))

    # Calculate the precision and recall
    precision = tp / (tp + fp + K.epsilon())
    recall = tp / (tp + fn + K.epsilon())

    # Calculate the F1 score
    f1_score = 2 * ((precision * recall) / (precision + recall + K.epsilon()))

    return f1_score

<font size="3">Train model with simple CNN. It gave the private score around 0.44.</font>

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

TRAINING_DIR='/tmp/train_images'
img_size = (150, 150)

# One of the methods to get datasets for training/validation
# train_dataset = tf.keras.utils.image_dataset_from_directory(TRAINING_DIR,
#                                                             batch_size=30,
#                                                             image_size=(224,224),
#                                                             validation_split=0.2,
#                                                             subset='training',
#                                                             seed=42)
# validation_dataset = tf.keras.utils.image_dataset_from_directory(TRAINING_DIR,
#                                                                  batch_size=30,
#                                                                  image_size=(224,224),
#                                                                  validation_split=0.2,
#                                                                  subset='validation',
#                                                                  seed=42)

# Image augmentation didn't provide better result
# training_datagen = ImageDataGenerator(rescale = 1./255,
#     rotation_range=20,      
#     width_shift_range=0.1,  
#     height_shift_range=0.1, 
#     shear_range=0.2,
#     zoom_range=0.2,
#     fill_mode='nearest')

training_datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)
val_datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)
train_generator = training_datagen.flow_from_directory(
    TRAINING_DIR,
    target_size=img_size,
    class_mode='categorical',
    batch_size=30,
    subset='training'
)    
validation_generator = val_datagen.flow_from_directory(
        TRAINING_DIR,
        target_size=img_size,
        batch_size=8,
        class_mode='categorical',
        subset='validation'
)

model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32, (3,3), activation='relu', input_shape=(img_size + (3,))),
    tf.keras.layers.MaxPooling2D(3, 3),
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(3,3),
    tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(3,3),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(200, activation='relu'),
    tf.keras.layers.Dense(6, activation='softmax')
])

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=[custom_f1_score])
model.fit(train_generator, validation_data=validation_generator, epochs=15)

<font size="3">Start using transfer learning (Resnet50). It gave the private score around 0.55.</font>

In [13]:
from sklearn.model_selection import train_test_split

TRAINING_DIR = os.path.join(base_dir, 'images', 'train_images')
img_size = (224, 224)

df = pd.read_csv(train_csv)
df['class_6'] = df['class_6'].astype(str)

train_generator = tf.keras.preprocessing.image.ImageDataGenerator(
    preprocessing_function=tf.keras.applications.resnet50.preprocess_input,
    validation_split=0.2
)
train_images = train_generator.flow_from_dataframe(
    dataframe=df,
    directory=TRAINING_DIR,
    x_col='image_id',
    y_col='class_6',
    target_size=img_size,
    color_mode='rgb',
    class_mode='categorical',
    batch_size=32,
    shuffle=True,
    seed=42,
    subset='training'
)
val_images = train_generator.flow_from_dataframe(
    dataframe=df,
    directory=TRAINING_DIR,
    x_col='image_id',
    y_col='class_6',
    target_size=img_size,
    color_mode='rgb',
    class_mode='categorical',
    batch_size=8,
    shuffle=True,
    seed=42,
    subset='validation'
)

pretrained_model = tf.keras.applications.resnet50.ResNet50(
    input_shape=(img_size + (3,)),
    include_top=False,
    weights='imagenet',
    pooling='avg')
pretrained_model.trainable = False

inputs = pretrained_model.input
x = tf.keras.layers.Dense(64, activation='relu')(pretrained_model.output)
x = tf.keras.layers.Dense(50, activation='relu')(x)
outputs = tf.keras.layers.Dense(6, activation='softmax')(x)

model = tf.keras.Model(inputs, outputs)
learning_rate = 1e-4
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
model.compile(
    loss='categorical_crossentropy',
    optimizer=optimizer,
    metrics=['accuracy', custom_f1_score])
callbacks = tf.keras.callbacks.EarlyStopping(monitor='val_loss',patience=3,restore_best_weights=True)
model.fit(train_images,
          validation_data=val_images,
          epochs=15,
          callbacks=[callbacks])

Found 1888 validated image filenames belonging to 6 classes.
Found 472 validated image filenames belonging to 6 classes.
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x7e913426eda0>

<font size="3">Use EfficientNet model (showed better score than Resnet50 model). Also unfreeze and train layers gives better result.
More unfreezed layers provides better score (also dropout has been increased together with number of unfreezed layers to get better result: 20 unfreeze layers - 0.2 dropout, 40 - 0.4).
EfficientNetB0 architecture showed the best result with current top architecture. Other architectures (EfficientNetB3/5/etc.) provided worse result. It gave the private score 0.81.</font>

In [17]:
from tensorflow.keras.applications import EfficientNetB0

TRAINING_DIR = os.path.join(base_dir, 'images', 'train_images')
img_size = (224, 224)

def unfreeze_model(model):
    for layer in model.layers[-40:]:
        if not isinstance(layer, tf.keras.layers.BatchNormalization):
            layer.trainable = True

    optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)
    model.compile(
        optimizer=optimizer, loss="categorical_crossentropy", metrics=["accuracy", custom_f1_score]
    )

df = pd.read_csv(train_csv)
df['class_6'] = df['class_6'].astype(str)

train_generator = tf.keras.preprocessing.image.ImageDataGenerator(
    validation_split=0.2
)
train_images = train_generator.flow_from_dataframe(
    dataframe=df,
    directory=TRAINING_DIR,
    x_col='image_id',
    y_col='class_6',
    target_size=img_size,
    color_mode='rgb',
    class_mode='categorical',
    batch_size=32,
    shuffle=True,
    seed=42,
    subset='training'
)
val_images = train_generator.flow_from_dataframe(
    dataframe=df,
    directory=TRAINING_DIR,
    x_col='image_id',
    y_col='class_6',
    target_size=img_size,
    color_mode='rgb',
    class_mode='categorical',
    batch_size=8,
    shuffle=True,
    seed=42,
    subset='validation'
)

pretrained_model = EfficientNetB0(
    input_shape=(img_size + (3,)),
    include_top=False,
    weights='imagenet'
)
pretrained_model.trainable = False

inputs = pretrained_model.input
x = tf.keras.layers.GlobalAveragePooling2D(name="avg_pool")(pretrained_model.output)
# x = tf.keras.layers.BatchNormalization()(x)
top_dropout_rate = 0.4
x = tf.keras.layers.Dropout(top_dropout_rate, name="top_dropout")(x)
x = tf.keras.layers.Dense(128, activation='relu')(x)
outputs = tf.keras.layers.Dense(6, activation='softmax')(x)
model = tf.keras.Model(inputs, outputs)
unfreeze_model(model)
# callbacks = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=4, restore_best_weights=True)
callbacks = tf.keras.callbacks.EarlyStopping(monitor="custom_f1_score", patience=4, mode="max")
model.fit(train_images, validation_data=val_images, epochs=30, callbacks=[callbacks])

Found 1888 validated image filenames belonging to 6 classes.
Found 472 validated image filenames belonging to 6 classes.
Epoch 1/30


2023-09-10 07:11:43.835949: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inmodel_2/block2b_drop/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30


<keras.callbacks.History at 0x7e91116741f0>

<font size="3">Train model on the complete dataset. The private score was increased to 0.8256.</font>

In [19]:
from tensorflow.keras.applications import EfficientNetB0

TRAINING_DIR = os.path.join(base_dir, 'images', 'train_images')
img_size = (224, 224)

def unfreeze_model(model):
    for layer in model.layers[-40:]:
        if not isinstance(layer, tf.keras.layers.BatchNormalization):
            layer.trainable = True

    optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)
    model.compile(
        optimizer=optimizer, loss="categorical_crossentropy", metrics=["accuracy", custom_f1_score]
    )

df = pd.read_csv(train_csv)
df['class_6'] = df['class_6'].astype(str)

train_generator = tf.keras.preprocessing.image.ImageDataGenerator()
train_images = train_generator.flow_from_dataframe(
    dataframe=df,
    directory=TRAINING_DIR,
    x_col='image_id',
    y_col='class_6',
    target_size=img_size,
    color_mode='rgb',
    class_mode='categorical',
    batch_size=32,
    shuffle=True,
    seed=42
)

pretrained_model = EfficientNetB0(
    input_shape=(img_size + (3,)),
    include_top=False,
    weights='imagenet'
)
pretrained_model.trainable = False

inputs = pretrained_model.input
x = tf.keras.layers.GlobalAveragePooling2D(name="avg_pool")(pretrained_model.output)
# x = tf.keras.layers.BatchNormalization()(x)
top_dropout_rate = 0.4
x = tf.keras.layers.Dropout(top_dropout_rate, name="top_dropout")(x)
x = tf.keras.layers.Dense(128, activation='relu')(x)
outputs = tf.keras.layers.Dense(6, activation='softmax')(x)
model = tf.keras.Model(inputs, outputs)
unfreeze_model(model)
callbacks = tf.keras.callbacks.EarlyStopping(monitor="custom_f1_score", patience=4, mode="max")
model.fit(train_images,epochs=30, callbacks=[callbacks])

Found 2360 validated image filenames belonging to 6 classes.
Epoch 1/30


2023-09-10 07:58:08.406447: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inmodel_3/block2b_drop/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30


<keras.callbacks.History at 0x7e91109368c0>

<font size="3">Predict the test images</font>

In [20]:
import numpy as np

from tensorflow.keras.utils import load_img, img_to_array
from IPython.display import HTML

row_list = [["image_id","class_6"]]
for image in os.listdir(test_dir):
    img = load_img(os.path.join(test_dir, image), target_size=img_size)
    x = img_to_array(img)
    x = np.expand_dims(x, axis=0)
    images = np.vstack([x])
    classes = model.predict(x, batch_size=32)
    yoga_class = np.argmax(classes)
    row_list.append([image, yoga_class])

submission_file = os.path.join('/kaggle/working', 'submission.csv')
with open(submission_file, 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerows(row_list)

