### Load modules

In [1]:
from keras.models import *
from keras.layers import *
from keras.applications import *
from keras.preprocessing.image import *

Using TensorFlow backend.


In [2]:
import os
import numpy as np
import pandas as pd

from sklearn.utils import shuffle
from sklearn.model_selection import KFold

In [3]:
import h5py

### Load train image list

In [4]:
df = pd.read_csv('driver_imgs_list.csv')
# drivers' names
subjects = np.unique(df['subject'])
# classes
classnames = np.unique(df['classname'])

### 5-fold train-val split in terms of drivers: 

In [5]:
kf = KFold(n_splits=5, random_state=42, shuffle=True)
subjects_t = []
subjects_v = []
for sub_t, sub_v in kf.split(subjects):
    subjects_t.append(list(subjects[sub_t]))
    subjects_v.append(list(subjects[sub_v]))

split the train image list

In [6]:
def split_list(subjects_t, subjects_v):
    df_t = df.loc[df.subject.isin(subjects_t)]
    df_v = df.loc[df.subject.isin(subjects_v)]
    return df_t, df_v

### original dataset dir.

In [7]:
dataset_dir = '/dataset_dir'
# dataset_dir = '../../dataset_resized'

train_raw_dir = dataset_dir + '/train'
test_dir = dataset_dir + '/test'

### make dirs of train and validation

In [8]:
import shutil
from shutil import *

In [9]:
from tqdm import tqdm

In [10]:
def mkdir_train_val(fold):
    if 'train'+str(fold) in os.listdir():
        shutil.rmtree('train'+str(fold))
    if 'validation'+str(fold) in os.listdir():
        shutil.rmtree('validation'+str(fold))
    for split_name in ['train', 'validation']:
        for cls in classnames:
            os.makedirs(split_name+str(fold) + '/' + cls)

In [11]:
def symlink_train_val(df_t, df_v, fold, train_dir):
    mkdir_train_val(fold)
    for i in tqdm(range(len(df_t))):
        cls = df_t.iloc[i]['classname']
        img = df_t.iloc[i]['img']
        os.symlink(train_dir+'/'+cls+'/'+img, 'train'+str(fold)+'/'+cls+'/'+img)
    for i in tqdm(range(len(df_v))):
        cls = df_v.iloc[i]['classname']
        img = df_v.iloc[i]['img']
        os.symlink(train_dir+'/'+cls+'/'+img, 'validation'+str(fold)+'/'+cls+'/'+img)

In [12]:
def copy_train_val(df_t, df_v, fold, train_dir):
    mkdir_train_val(fold)
    for i in tqdm(range(len(df_t))):
        cls = df_t.iloc[i]['classname']
        img = df_t.iloc[i]['img']
        copyfile(train_dir+'/'+cls+'/'+img, 'train'+str(fold)+'/'+cls+'/'+img)
    for i in tqdm(range(len(df_v))):
        cls = df_v.iloc[i]['classname']
        img = df_v.iloc[i]['img']
        copyfile(train_dir+'/'+cls+'/'+img, 'validation'+str(fold)+'/'+cls+'/'+img)

In [13]:
for fold in range(1,2):
    df_t, df_v = split_list(subjects_t[fold], subjects_v[fold])
    symlink_train_val(df_t, df_v, fold, train_raw_dir)

100%|██████████| 18418/18418 [02:30<00:00, 122.39it/s]
100%|██████████| 4006/4006 [00:32<00:00, 124.36it/s]


In [14]:
train_dir = 'train1'
val_dir = 'validation1'

### preprocessing images

In [15]:
# preprocess function of VGG and ResNet50
def preprocess_fn(x):
    # RGB >> BGR
    x = img_to_array(x)
    x = x[:, :, ::-1]
    # Zero-center by mean pixel
    x[:, :, 0] -= 103.939
    x[:, :, 1] -= 116.779
    x[:, :, 2] -= 123.68
    return x

In [16]:
# default image size of VGG and ResNet50
img_width, img_height = 224, 224

In [17]:
batch_size = 32

In [18]:
data_gen = ImageDataGenerator(preprocessing_function=preprocess_fn)

original generator

In [22]:
train_generator = data_gen.flow_from_directory(train_dir, 
                                               target_size=(img_height, img_width), 
                                               class_mode='categorical',
                                               batch_size=batch_size)

Found 17446 images belonging to 10 classes.


In [19]:
val_generator = data_gen.flow_from_directory(val_dir, 
                                             target_size=(img_height, img_width), 
                                             class_mode='categorical',
                                             batch_size=batch_size)

Found 4006 images belonging to 10 classes.


In [22]:
test_generator = data_gen.flow_from_directory(test_dir, 
                                              target_size=(img_height, img_width), 
                                              class_mode=None,
                                              shuffle=False,
                                              batch_size=batch_size)

Found 79726 images belonging to 1 classes.


transfer generator

In [None]:
train_tf_generator = data_gen.flow_from_directory(train_dir, 
                                                  target_size=(img_height, img_width), 
                                                  class_mode='categorical',
                                                  shuffle=False,
                                                  batch_size=batch_size)

In [None]:
val_tf_generator = data_gen.flow_from_directory(val_dir,
                                                target_size=(img_height, img_width), 
                                                class_mode='categorical',
                                                shuffle=False,
                                                batch_size=batch_size)

augment generator

In [20]:
data_gen_aug = ImageDataGenerator(rotation_range=30.,
                                  width_shift_range=0.1,
                                  height_shift_range=0.1,
                                  preprocessing_function=preprocess_fn)

In [21]:
train_aug_generator = data_gen_aug.flow_from_directory(train_dir, 
                                                       target_size=(img_height, img_width), 
                                                       class_mode='categorical',
                                                       batch_size=batch_size)

Found 18418 images belonging to 10 classes.


In [42]:
val_aug_generator = data_gen_aug.flow_from_directory(val_dir, 
                                                     target_size=(img_height, img_width), 
                                                     class_mode='categorical',
                                                     batch_size=batch_size)

Found 4978 images belonging to 10 classes.


In [44]:
test_aug_generator = data_gen_aug.flow_from_directory(test_dir, 
                                                      target_size=(img_height, img_width), 
                                                      class_mode=None,
                                                      shuffle=False,
                                                      batch_size=batch_size)

Found 79726 images belonging to 1 classes.


### Fine-tuning ResNet50

In [23]:
from keras.optimizers import *
from keras.callbacks import *

In [24]:
adam = Adam(lr=1e-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=1e-6)
sgd = SGD(lr=1e-4, decay=1e-6, momentum=0.9, nesterov=True)

In [25]:
def model_build(MODEL, optimizer, layer_num_fix, top_dropout=False):
    base_model = MODEL(weights='imagenet', include_top=True)
    if top_dropout:
        x = Dropout(0.4, name='top_dropout')(base_model.layers[-2].output)
        x = Dense(10, activation='softmax', name='top_prediction')(x)
    else:
        x = Dense(10, activation='softmax', name='top_prediction')(base_model.layers[-2].output)
        
    model_ft = Model(base_model.input, x, name=MODEL.__name__)
    model_ft.compile(optimizer=optimizer, 
                     loss='categorical_crossentropy', 
                     metrics=['accuracy'])
    for layer in model_ft.layers[:layer_num_fix]:
        layer.trainable = False
    return model_ft

In [26]:
model_resnet50_ft = model_build(ResNet50, adam, 142, False)

In [27]:
tensorboard_cb = TensorBoard(log_dir='log/resnet50', histogram_freq=0, batch_size=batch_size, write_graph=True, write_images=True)
ckpt_cb = ModelCheckpoint('resnet50_ft1_aug_weights.{epoch:02d}-{val_loss:.3f}.hdf5', monitor='val_loss', verbose=1, save_best_only=True)
stp_cb = EarlyStopping(monitor='val_loss', min_delta=0.001, patience=10, verbose=1, mode='auto')

In [28]:
num_epochs = 50

In [29]:
model_resnet50_ft.fit_generator(train_aug_generator, 
                             50,
                             num_epochs, 
                             validation_data=val_generator, 
                             validation_steps=val_generator.samples//batch_size+1,
                             callbacks=[tensorboard_cb, ckpt_cb, stp_cb])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 00020: early stopping


<keras.callbacks.History at 0x7f3462f5aa90>

In [30]:
model_resnet50_ft.load_weights('resnet50_ft1_aug_weights.09-0.373.hdf5')

In [31]:
def write_predicts_gen(test_gen, batch_size, sub_fname):
    y_pred = model_resnet50_ft.predict_generator(test_gen, 
                                              test_gen.samples//batch_size+1, 
                                              verbose=1)

    y_pred_df = pd.DataFrame(y_pred, columns=classnames)
    
    img_name = []
    for i, fname in enumerate(test_gen.filenames):
        img_name.append(os.path.basename(fname))

    img_name_df = pd.DataFrame(img_name, columns=['img'])
    predictions = pd.concat([img_name_df, y_pred_df], axis=1)
    print(predictions.shape)
    predictions.to_csv(sub_fname, index=False)
    print('submission file {} is ready.'.format(sub_fname))

In [32]:
write_predicts_gen(test_generator, batch_size, 'submission_resnet50_ft1_aug.csv')

(79726, 11)
submission file submission_resnet50_ft1_aug.csv is ready.
