In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
print(os.listdir("../input"))

# Any results you write to the current directory are saved as output.

In [None]:
import seaborn as sns
import matplotlib.pylab as plt
import PIL
from keras import backend as K
from keras.preprocessing.image import ImageDataGenerator
from keras.applications.inception_v3 import InceptionV3
from keras.applications.inception_resnet_v2 import InceptionResNetV2
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from keras import layers, models, optimizers

In [None]:
DATA_PATH = '../input'
TRAIN_IMG_PATH = os.path.join(DATA_PATH, 'train')
TEST_IMG_PATH = os.path.join(DATA_PATH, 'test')

df_train = pd.read_csv(os.path.join(DATA_PATH, 'train.csv'))
df_test = pd.read_csv(os.path.join(DATA_PATH, 'test.csv'))
df_class = pd.read_csv(os.path.join(DATA_PATH, 'class.csv'))

In [None]:
nb_train_sample = df_train.shape[0] * 0.7
nb_validation_sample = df_train.shape[0] - nb_train_sample
nb_test_sample = df_test.shape[0]

In [None]:
df_train.head()

In [None]:
plt.figure(figsize=(15,6))
sns.countplot(df_train["class"], order=df_train["class"].value_counts(ascending=True).index)

In [None]:
df_train['class'].value_counts().describe()

In [None]:
df_train['class'].value_counts().mean()

In [None]:
img_size=224

def crop_boxing_img(img_name, margin=16, imsize=(img_size, img_size)):
    if img_name.split('_')[0] == 'train':
        PATH = TRAIN_IMG_PATH
        data = df_train
    else:
        PATH = TEST_IMG_PATH
        data = df_test

    img = PIL.Image.open(os.path.join(PATH, img_name))
    pos = data.loc[data["img_file"] == img_name, ['bbox_x1', 'bbox_y1', 'bbox_x2', 'bbox_y2']].values.reshape(-1)

    width, height = img.size
    x1 = max(0, pos[0] - margin)
    y1 = max(0, pos[1] - margin)
    x2 = min(pos[2] + margin, width)
    y2 = min(pos[3] + margin, height)

    return img.crop((x1, y1, x2, y2)).resize(imsize)

In [None]:
nb_train_sample = df_train.shape[0] * 0.8
nb_validation_sample = df_train.shape[0] - nb_train_sample
nb_test_sample = df_test.shape[0]

In [None]:
TRAIN_CROPPED_PATH = '../cropped_train'
TEST_CROPPED_PATH = '../cropped_test'
VALID_CROPPED_PATH = '../cropped_valid'
if (os.path.isdir(TRAIN_CROPPED_PATH) == False):
    os.mkdir(TRAIN_CROPPED_PATH)
if (os.path.isdir(TEST_CROPPED_PATH) == False):
    os.mkdir(TEST_CROPPED_PATH)
if (os.path.isdir(VALID_CROPPED_PATH) == False):
    os.mkdir(VALID_CROPPED_PATH)
    

In [None]:
df_train["class"] = df_train["class"].astype('str')

for i, row in df_train.iterrows():
    cropped = crop_boxing_img(row['img_file'])
    
    if ( i < nb_train_sample):
        class_path = os.path.join(TRAIN_CROPPED_PATH, df_train['class'][i])
        if(os.path.isdir(class_path) == False):
            os.mkdir(class_path)

        cropped.save(os.path.join(class_path, row['img_file']))
    else:
        class_path = os.path.join(VALID_CROPPED_PATH, df_train['class'][i])
        if(os.path.isdir(class_path) == False):
            os.mkdir(class_path)

        cropped.save(os.path.join(class_path, row['img_file']))

for i, row in df_test.iterrows():
    cropped = crop_boxing_img(row['img_file'])
    cropped.save(os.path.join(TEST_CROPPED_PATH, row['img_file']))

In [None]:
batch_size=32
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    vertical_flip=False,
    zoom_range=0.1,
    fill_mode='nearest'
    )
test_datagen = ImageDataGenerator(rescale=1./255)
valid_datagen = ImageDataGenerator(rescale=1./255)

In [None]:
train_generator = train_datagen.flow_from_directory(
    TRAIN_CROPPED_PATH,
    target_size=(img_size, img_size),
    batch_size=batch_size,
    class_mode='categorical',
    seed=2019,
    color_mode='rgb'
)
validation_generator = valid_datagen.flow_from_directory(
    VALID_CROPPED_PATH,
    target_size=(img_size, img_size),
    batch_size=batch_size,
    class_mode='categorical',
    seed=2019,
    color_mode='rgb'
)
test_generator = test_datagen.flow_from_dataframe(
    dataframe=df_test,
    directory=TEST_CROPPED_PATH,
    x_col='img_file',
    y_col=None,
    target_size=(img_size, img_size),
    color_mode='rgb',
    class_mode=None,
    batch_size=batch_size,
    shuffle=False
)

In [None]:
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Flatten, Activation, Conv2D, GlobalAveragePooling2D

inception_module = InceptionResNetV2(weights='imagenet',include_top=False, input_shape = (224,224,3))
model = models.Sequential()
model.add(inception_module)
model.add(GlobalAveragePooling2D())
model.add(Dense(1024, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(196, activation='softmax'))
model.summary()

In [None]:
optimizer = optimizers.Adam(lr=0.00005)
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['acc'])


In [None]:
def get_steps(num_samples, batch_size):
    if (num_samples % batch_size) > 0:
        return (num_samples // batch_size) + 1
    else:
        return num_samples // batch_size

In [None]:
es = EarlyStopping(monitor='val_acc', min_delta=0, patience=3, verbose=1, mode='auto')
callbackList = [es]
epochs=300
history = model.fit_generator(
    train_generator,
    steps_per_epoch = get_steps(nb_train_sample, batch_size),
    epochs=epochs,
    validation_data = validation_generator,
    validation_steps = get_steps(nb_validation_sample, batch_size),
    callbacks = callbackList
)

In [None]:
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(1, len(acc) + 1)

plt.plot(epochs, acc, 'bo', label='Training Acc')
plt.plot(epochs, val_acc, 'b', label='Validation Acc')
plt.title('Training and validation accuracy')
plt.legend()
plt.show()

plt.plot(epochs, loss, 'bo', label='Traing loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Trainging and validation loss')
plt.legend()
plt.show()