In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
import random
print(os.listdir("../input"))
print(os.listdir())
from tqdm import tqdm
from keras.preprocessing import image
from keras.applications.xception import Xception, preprocess_input
from keras.callbacks import Callback, LearningRateScheduler, ReduceLROnPlateau, ModelCheckpoint, EarlyStopping
from keras.models import Model
from keras.layers import Dense, Dropout
from keras.optimizers import RMSprop
from keras.preprocessing.image import ImageDataGenerator
from keras.regularizers import l2
from sklearn.metrics import roc_auc_score
import logging

# Any results you write to the current directory are saved as output.

Load traing data file names.

In [None]:
labels = pd.read_csv("../input/train_labels.csv")
labels.head()

train = []
for file in labels['name'].apply(lambda x: str(x) + '.jpg'):
    train.append(['../input/train/{}'.format(file)])
train = pd.DataFrame(train, columns=['file'])

Add Response variable

In [None]:
train['invasive'] = labels['invasive']

Split training and validation data 80/20.

In [None]:
trainSample = train.sample(frac=0.8)
validate = train[~train.file.isin(trainSample['file'])]

trainSample.sort_index( inplace=True)
validate.sort_index(inplace=True)

Make Response arrays for Categorical Crossentropy.

In [None]:
trainY = pd.DataFrame({'non': 1 - trainSample['invasive'], 'invasive' : trainSample['invasive']})
valY = pd.DataFrame({'non': 1 - validate['invasive'], 'invasive' : validate['invasive']})

Load training and validation images.

In [None]:
def read_img(filepath, size):
    img = image.load_img(os.path.join(filepath), target_size=size)
    img = image.img_to_array(img)
    return img

INPUT_SIZE = 224
trainX = np.zeros((len(trainSample), INPUT_SIZE, INPUT_SIZE, 3), dtype='float32')
for i, file in tqdm(enumerate(trainSample['file'])):
    img = read_img(file, (INPUT_SIZE, INPUT_SIZE))
    trainX[i] = img
    
valX =  np.zeros((len(validate), INPUT_SIZE, INPUT_SIZE, 3), dtype='float32')
for i, file in tqdm(enumerate(validate['file'])):
    img = read_img(file, (INPUT_SIZE, INPUT_SIZE))
    valX[i] = img

Add preprocessing and data augmentation.

In [None]:
train_datagen = ImageDataGenerator(
    rotation_range=90,
    width_shift_range=0.2,
    height_shift_range=0.2,
    #shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    preprocessing_function=preprocess_input)

val_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input)

Create AUC metri for evaluation

In [None]:
import tensorflow as tf
def auc_roc(y_true, y_pred):
    value, update_op = tf.contrib.metrics.streaming_auc(y_pred, y_true)

    metric_vars = [i for i in tf.local_variables() if 'auc_roc' in i.name.split('/')[1]]

    for v in metric_vars:
        tf.add_to_collection(tf.GraphKeys.GLOBAL_VARIABLES, v)

    with tf.control_dependencies([update_op]):
        value = tf.identity(value)
        return value

Load Xception model and add dense output

In [None]:
basic_model = Xception(include_top=False, weights='imagenet', pooling='avg')

input_tensor = basic_model.input
# build top
x = basic_model.output
x = Dropout(.5)(x)
x = Dense(2, activation='softmax')(x)

best_model_file = 'XC-224x224.h5'
model = Model(inputs=input_tensor, outputs=x)

for layer in model.layers:
    layer.W_regularizer = l2(1e-2)
    layer.trainable = True

Compile model

In [None]:
best_model_file = 'XC-224x224.h5'
model = Model(inputs=input_tensor, outputs=x)

for layer in model.layers:
    layer.W_regularizer = l2(1e-2)
    layer.trainable = True

model.compile(optimizer=RMSprop(1e-3), loss='categorical_crossentropy', metrics=[auc_roc])

Add Callbacks

In [None]:
callbacks = [EarlyStopping(monitor='auc_roc', patience=10, verbose=1, min_delta=1e-5),
             ReduceLROnPlateau(monitor='auc_roc', factor=0.1, patience=5, cooldown=1, 
                               verbose=1, min_lr=1e-7),
             ModelCheckpoint(filepath=best_model_file, verbose=1,
                             save_best_only=True, save_weights_only=True, mode='auto')]

Train Model

In [None]:
model.fit_generator(train_datagen.flow(trainX, trainY, batch_size=16), epochs=100, 
                    validation_data=val_datagen.flow(valX, valY, batch_size=16),
                    callbacks=callbacks,
                    steps_per_epoch = trainSample.shape[0]/16,
                    validation_steps = validate.shape[0]/16,
                    #workers=4,
                    verbose=1)

Load Test Data

In [None]:
test = []
for file in os.listdir('../input/test'):
    test.append(['../input/test/{}'.format(file)])
test = pd.DataFrame(test, columns=['file'])
test.sort_values('file',inplace=True)

testX = np.zeros((len(test), INPUT_SIZE, INPUT_SIZE, 3), dtype='float32')
for i, file in tqdm(enumerate(test['file'])):
    img = read_img(file, (INPUT_SIZE, INPUT_SIZE))
    testX[i] = img

Save Test Predictions

In [None]:
predictions = model.predict_generator(val_datagen.flow(testX, shuffle=False,
                                                       batch_size=1),
                                     steps=testX.shape[0])

pd.DataFrame({'name': test['file'], 'invasive': predictions[:, 1]}).to_csv('XC.csv', index=False)