In [None]:
import gc
import os
import pylab
import zipfile
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# tensorflow
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow.keras.applications as efn
from keras.preprocessing.image import ImageDataGenerator, load_img

## Dataset

### Load dataset

In [None]:
split = 0.2
seed = 19260817
batch_size = 16
img_size = (224, 224)

train_ds = keras.utils.image_dataset_from_directory(
    'dataset/train',
    labels="inferred",
    label_mode="int",
    color_mode="rgb",
    batch_size=batch_size,
    image_size=img_size,
    shuffle=True,
    seed=seed,
    validation_split=split,
    subset='training',
)
val_ds = keras.utils.image_dataset_from_directory(
    'dataset/train',
    labels="inferred",
    label_mode="int",
    color_mode="rgb",
    batch_size=batch_size,
    image_size=img_size,
    shuffle=True,
    seed=seed,
    validation_split=split,
    subset='validation',
)

### Visualize dataset

In [None]:
label_names = {0: 'big', 1: 'normal', 2: 'small'}

fig, ax = plt.subplots(figsize=(15, 7))
for img, label in train_ds.take(1):
    for i in range(12):
        plt.subplot(3, 4, i + 1)
        plt.imshow(img.numpy()[i] / 255)
        plt.axis("off")
        plt.title("label: " + label_names[label.numpy()[i]])
plt.show()

## EfficientNet Model

### Plot history

In [None]:
def plot_history(history, title=''):  
    if title != '':
        title += ' '
    # Ploting the Loss and Accuracy Curves
    fig, ax = plt.subplots(nrows = 1, ncols = 2, figsize = (16, 6))
    # Loss
    sns.lineplot(data = history.history['loss'], label = 'Training Loss', ax = ax[0])
    sns.lineplot(data = history.history['val_loss'], label = 'Validation Loss', ax = ax[0])
    ax[0].legend(loc = 'upper right')
    ax[0].set_title(title + 'Loss')
    # Accuracy
    sns.lineplot(data = history.history['accuracy'], label = 'Training Accuracy', ax = ax[1])
    sns.lineplot(data = history.history['val_accuracy'], label = 'Validation Accuracy', ax = ax[1])
    ax[1].legend(loc = 'lower right')
    ax[1].set_title(title + 'Accuracy')

### Augmentation

In [None]:
data_augmentation = keras.Sequential(
    [
        layers.RandomFlip("horizontal"),
        layers.RandomRotation(0.2),
        layers.RandomTranslation(0.14, 0.14),
        layers.RandomZoom(0.2),
        layers.RandomContrast(0.2),
    ]
)

### Save best callback and load

In [None]:
def save_best(name:str='model', patient=3):
    CB = [
        keras.callbacks.ModelCheckpoint(
        filepath=name + ".keras",
        save_best_only=True,
        monitor="val_loss"),

        keras.callbacks.EarlyStopping(
        monitor='val_loss',
        min_delta=0.0005,
        patience=patient)
    ]
    return CB

def load_best(name:str='model'):
    return keras.models.load_model(name + ".keras")

### Load EfficientNet model

In [None]:
EfficientNetB = list(range(8))
EfficientNetB[0] = efn.EfficientNetB0(weights=None, include_top=True, input_shape=img_size+(3,), classes=3)
EfficientNetB[1] = efn.EfficientNetB1(weights=None, include_top=True, input_shape=img_size+(3,), classes=3)
EfficientNetB[2] = efn.EfficientNetB2(weights=None, include_top=True, input_shape=img_size+(3,), classes=3)
EfficientNetB[3] = efn.EfficientNetB3(weights=None, include_top=True, input_shape=img_size+(3,), classes=3)
EfficientNetB[4] = efn.EfficientNetB4(weights=None, include_top=True, input_shape=img_size+(3,), classes=3)
EfficientNetB[5] = efn.EfficientNetB5(weights=None, include_top=True, input_shape=img_size+(3,), classes=3)
EfficientNetB[6] = efn.EfficientNetB6(weights=None, include_top=True, input_shape=img_size+(3,), classes=3)
EfficientNetB[7] = efn.EfficientNetB7(weights=None, include_top=True, input_shape=img_size+(3,), classes=3)

In [None]:
using_model = [0, 1, 2, 3, 4, 5, 6, 7]

In [None]:
names = ['EfficientNetB' + str(i) for i in range(0, 8)]
names

In [None]:
def get_model(EfficientNet):
    inputs = keras.Input(shape=img_size+(3,))
    # inputs = data_augmentation(inputs)
    outputs = EfficientNet(inputs)
    model = keras.Model(inputs, outputs)
    model.compile(optimizer="rmsprop",
              loss="sparse_categorical_crossentropy",
              metrics=["accuracy"])
    return model

In [None]:
def train_model(model,name:str='model'):
    history = model.fit(
        train_ds,
        epochs=100,
        validation_data=val_ds,
        callbacks=save_best(name)
    )
    return history

In [None]:
EfficientNetB[0].summary()

### Train models

In [None]:
historys = {}
for i in usingModel:
    model = get_model(EfficientNetB[i])
    history = train_model(model, name=names[i])
    historys[names[i]] = history

In [None]:
for name in historys:
    plot_history(historys[name], title=name)
    plt.savefig(name + '.png')

### Compare models

In [None]:
def get_loss_table():
    list = []
    for i in using_model:
        model = load_best(names[i])
        test = model.evaluate(val_ds)
        list.append(test)
    return list

In [None]:
loss_table = get_loss_table()

In [None]:
using_model_name = [names[i] for i in using_model]
using_model_name

In [None]:
pd.set_option('precision', 10)
compare_table = pd.DataFrame(loss_table, columns=['loss', 'accuracy'], index=using_model_name)
cm = sns.light_palette("blue", as_cmap=True)
compare_table.head(len(using_model)).style.background_gradient(cmap=cm)

## Prediction

### Select the best model

In [None]:
best_model_name = compare_table.idxmax()['accuracy']
model = load_best(best_model_name)

### Prepare testing dataset

In [None]:
test_filenames = os.listdir('dataset/test')
test_df = pd.DataFrame({
    'id': test_filenames
})
nb_samples = test_df.shape[0]
nb_samples

In [None]:
test_gen = ImageDataGenerator()
test_generator = test_gen.flow_from_dataframe(
    test_df, 
    "dataset/test", 
    x_col='id',
    y_col=None,
    class_mode=None,
    target_size=img_size,
    batch_size=batch_size,
    shuffle=False
)

### Predict and view

In [None]:
predict = model.predict(test_generator)
test_df['labels'] = np.argmax(predict, axis=-1)

In [None]:
test_df

In [None]:
label_names = {0: 'big', 1: 'normal', 2: 'small'}

fig, ax = plt.subplots(figsize=(15, 7))
i = 0
for index, row in test_df.sample(12).iterrows():
    i += 1
    plt.subplot(3, 4, i)
    img = plt.imread('dataset/test/' + row['id'])
    plt.imshow(img)
    plt.axis("off")
    plt.title("label: " + label_names[label.numpy()[i]])
plt.show()

### Submit

In [None]:
test_df.set_index('id')
test_df.to_csv('submission.csv', index=False)