In [1]:
# Creating two directories - "data" and "data/trainset_11classes_0_00000" 
# !mkdir data && mkdir data/trainset_11classes_0_00000
# Downloading the ai-camp competition dataset
# !wget -N https://ai-camp.s3-us-west-2.amazonaws.com/trainset_11classes_0_00000.zip
# !wget -N https://ai-camp.s3-us-west-2.amazonaws.com/trainset_4classes_2_20406.zip
# Unzip the data into the folder "data/trainset_11classes_0_00000"
# !unzip -qq -n trainset_4classes_2_20406.zip -d data/trainset_11classes_0_00000
# Switch directory to "data/trainset_11classes_0_00000" and show its content
# !cd data/trainset_11classes_0_00000 && ls

In [2]:

import os

base_dir = 'data/trainset_11classes_0_00000'

# Directory to our training data
train_folder = os.path.join(base_dir, 'train')

# Directory to our validation data
val_folder = os.path.join(base_dir, 'val')


In [3]:

# List folders and number of files
# print("Directory, Number of files")
# for root, subdirs, files in os.walk(base_dir):
#     print(root, len(files))

In [4]:
!pip3 install keras --user

[33mYou are using pip version 19.0.2, however version 19.1.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [5]:
import keras
from keras.preprocessing.image import ImageDataGenerator

# Batch size
bs = 32 

# All images will be resized to this value
image_size = (299, 299)

# All images will be rescaled by 1./255. We apply data augmentation here.
train_datagen = ImageDataGenerator(rescale=1./255,
                                   brightness_range= [0.5,1.5],
                                   horizontal_flip=True)
val_datagen = ImageDataGenerator(rescale=1./255)

# Flow training images in batches of 32 using train_datagen generator
print("Preparing generator for train dataset")
train_generator = train_datagen.flow_from_directory(
    directory= train_folder, # This is the source directory for training images 
    target_size=image_size, # All images will be resized to value set in image_size
    batch_size=bs,
    class_mode='categorical')

# Flow validation images in batches of 32 using val_datagen generator
print("Preparing generator for validation dataset")
val_generator = val_datagen.flow_from_directory(
    directory= val_folder, 
    target_size=image_size,
    batch_size=bs,
    class_mode='categorical')

Using TensorFlow backend.


Preparing generator for train dataset
Found 1322 images belonging to 15 classes.
Preparing generator for validation dataset
Found 347 images belonging to 15 classes.


In [6]:
from keras.models import Model, Sequential
from keras.applications.xception import Xception
from keras.layers import Input, Conv2D, BatchNormalization, Activation, GlobalAveragePooling2D, Dense, Dropout, MaxPooling2D, Flatten

keras.layers.ZeroPadding2D(padding=(1, 1), data_format=None)

# Here we specify the input shape of our data 
# This should match the size of images ('image_size') along with the number of channels (3)
input_shape = (299, 299, 3)

# Define the number of classes
num_classes = 15
# keras.applications.xception.Xception(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000)

# Defining a baseline model. Here we use the [keras functional api](https://keras.io/getting-started/functional-api-guide) to build the model. 
# TODO: explore different architectures and training schemes
base_model = Xception(include_top=False, weights='imagenet', input_shape=(299,299,3))
for layer in base_model.layers:
    layer.trainable = False
model= Sequential()
model.add(base_model)
model.add(Flatten())
model.add(Dense(1024,activation='relu'))
model.add(Dense(1024,activation='relu'))
model.add(Dense(15,activation='softmax'))

Instructions for updating:
Colocations handled automatically by placer.


In [7]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
xception (Model)             (None, 10, 10, 2048)      20861480  
_________________________________________________________________
flatten_1 (Flatten)          (None, 204800)            0         
_________________________________________________________________
dense_1 (Dense)              (None, 1024)              209716224 
_________________________________________________________________
dense_2 (Dense)              (None, 1024)              1049600   
_________________________________________________________________
dense_3 (Dense)              (None, 15)                15375     
Total params: 231,642,679
Trainable params: 210,781,199
Non-trainable params: 20,861,480
_________________________________________________________________


In [8]:
from keras import optimizers
from adabound import AdaBound


# optm = AdaBound(lr=1e-03,
#                 final_lr=0.1,
#                 gamma=1e-03,
#                 weight_decay=0.,
#                 amsbound=False)
optm = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) #might be effective, interrupted too soon
optm = optimizers.Adam(lr=0.01, epsilon=None, decay=0.0)

model.compile(loss='categorical_crossentropy',
              optimizer=optm,
              metrics=['accuracy'])

#learning rate in adam


In [9]:
from keras.callbacks import ModelCheckpoint

bestValidationCheckpointer = ModelCheckpoint('train_model.hdf5', monitor='val_acc', save_best_only=True, verbose=1)

In [None]:
history = model.fit_generator(
        train_generator, # train generator has 973 train images
        steps_per_epoch=train_generator.samples // bs + 1,
        epochs=30,
        validation_data=val_generator, # validation generator has 253 validation images
        validation_steps=val_generator.samples // bs + 1,
        callbacks=[bestValidationCheckpointer]
)

Instructions for updating:
Use tf.cast instead.
Epoch 1/30

In [None]:
from keras.models import load_model

model_path = 'saved_model.hdf5'
model = load_model( model_path )

In [None]:
val_generator.reset()

scores = model.evaluate_generator(val_generator, steps=val_generator.samples // val_generator.batch_size + 1, verbose=1)
print('Val loss:', scores[0])
print('Val accuracy:', scores[1])