## Load data from kaggle

In [3]:
!pip install kaggle -q
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/

In [4]:
!kaggle competitions download -c plant-pathology-2020-fgvc7

Downloading plant-pathology-2020-fgvc7.zip to /content
 98% 765M/779M [00:04<00:00, 157MB/s]
100% 779M/779M [00:04<00:00, 171MB/s]


In [5]:
!unzip -q /content/plant-pathology-2020-fgvc7.zip

In [6]:
!pip -q install tensorflow
!pip install -q keras

# Data preparation

In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras

from tensorflow.keras.applications.resnet50  import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import RandomFlip, RandomRotation

%matplotlib inline

In [8]:
tf.test.gpu_device_name()

'/device:GPU:0'

In [9]:
SEED = 2007
BATCH_SIZE = 64
TARGET = ['healthy', 'multiple_diseases',	'rust',	'scab']


np.random.seed(SEED)
tf.keras.utils.set_random_seed(SEED)

In [10]:
test_df = pd.read_csv('test.csv')
train_df = pd.read_csv('train.csv')

In [11]:
test_df['image_path'] = '/content/images/' + test_df['image_id'] + '.jpg'
train_df['image_path'] = '/content/images/' + train_df['image_id'] + '.jpg'

In [12]:
n_train = int(len(train_df)*0.8)

In [13]:
train_gen = ImageDataGenerator(preprocessing_function=preprocess_input)
aug_gen = ImageDataGenerator(preprocessing_function=preprocess_input,
                                        rotation_range=40,
                                        width_shift_range=0.2,
                                        height_shift_range=0.2,
                                        shear_range=0.2,
                                        zoom_range=0.1,
                                        horizontal_flip=True,)

test_gen = ImageDataGenerator(preprocessing_function=preprocess_input)

In [21]:
train_ds = train_gen.flow_from_dataframe(train_df.iloc[:n_train, :],
                                         x_col='image_path',
                                         y_col=TARGET,
                                         class_mode='raw',
                                         batch_size=BATCH_SIZE,
                                         seed=SEED)
aug_ds = aug_gen.flow_from_dataframe(train_df.iloc[:n_train, :],
                                         x_col='image_path',
                                         y_col=TARGET,
                                         class_mode='raw',
                                         batch_size=BATCH_SIZE,
                                         seed=SEED)
val_ds = test_gen.flow_from_dataframe(train_df.iloc[n_train:, :],
                                      x_col='image_path',
                                      y_col=TARGET,
                                      class_mode='raw',
                                      batch_size=BATCH_SIZE,
                                      seed=SEED,
                                      shuffle=False)
test_ds = test_gen.flow_from_dataframe(test_df, x_col='image_path',
                                        class_mode=None,
                                        batch_size=BATCH_SIZE,
                                        seed=SEED,
                                        shuffle=False)

Found 1456 validated image filenames.
Found 1456 validated image filenames.
Found 365 validated image filenames.
Found 1821 validated image filenames.


# Tuning augmentation

In [15]:
def train_model(train, val=None, lr_tl=0.01, lr_ft=1e-4, drop_rate=0, inner_size=(224,224,3),
                n_epoch=10, fine_tune=True):
    """
    Train parameters of model.

    Args:
        lr_tl: Learning rate for transfer learning.
        model_path: Learning rate for fine tuning.
        drop_rate: Rate of dropout layer.
        inner_size: Size of the inner layer.
        n_epoch: Number of epoches for training.
        augmentation: Augmentation layer.

    Return:
        History of model training.
    """

# Create base model
    base_model = ResNet50(weights='imagenet', input_shape=inner_size, include_top=False)
    base_model.trainable = False
    # Create a new model on top.
    inputs = keras.Input(shape=inner_size)
    x = base_model(inputs, training=False)
    x = keras.layers.GlobalAveragePooling2D()(x)
    x = keras.layers.Dropout(drop_rate)(x)
    outputs = keras.layers.Dense(4, activation='softmax')(x)
    model = keras.Model(inputs, outputs)
    print('====== Strart transfer-learning ======')
    # Transfer-learning train
    model.compile(
                  optimizer=keras.optimizers.Adam(lr_tl),
                  loss=keras.losses.CategoricalCrossentropy(),
                  metrics=[keras.metrics.CategoricalAccuracy()],
                  )
    tl_history = model.fit(train, epochs=n_epoch, validation_data=val)
    if fine_tune:
        print('====== Strart fine-tuning ======')
        # Fine-tuning train
        base_model.trainable=True

        model.compile(
                      optimizer=keras.optimizers.Adam(lr_ft),
                      loss=keras.losses.CategoricalCrossentropy(),
                      metrics=[keras.metrics.CategoricalAccuracy()],
                      )
        fn_history = model.fit(train, epochs=n_epoch, validation_data=val)
        return (model, fn_history)
    return (model, tl_history)

**Train model without augmentation.**

In [None]:
train_inf = train_model(train_ds, val_ds)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


**Train model with augmentation.**

In [None]:
aug_train_inf = train_model(aug_ds, val_ds)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


**Inference: Model traned with data augmentation shows better accuracy.**

# Train best model using full train dataset.

In [17]:
full_train_ds = aug_gen.flow_from_dataframe(train_df,
                                            x_col='image_path',
                                            y_col=TARGET,
                                            class_mode='raw',
                                            batch_size=BATCH_SIZE,
                                            seed=SEED)

Found 1821 validated image filenames.


In [18]:
full_train_inf = train_model(full_train_ds)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [19]:
full_train_inf[0].save('aug_resnet50.h5')

# Predict

In [22]:
pred = full_train_inf[0].predict(test_ds)



In [24]:
submission = pd.DataFrame(pred, columns=TARGET, index=test_df['image_id'])
submission.index.name = 'image_id'
submission.to_csv('submission.csv')

**Final score is 0.9475.**