In [1]:
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dense, Dropout, Flatten
from keras.callbacks import ModelCheckpoint, TensorBoard
from keras import optimizers
import pandas as pd

import os

STEPS_PER_EPOCH = 500
BATCH_SIZE = 64
EPOCHS = 20
VAL_EPOCHS = 80

OPTIMIZER = optimizers.RMSprop(lr=1e-4)

LOGS_PATH = 'data/logs'
TRAIN_PATH = 'data/train'
VAL_PATH = 'data/validation'
TEST_PATH = 'data/test'
SAVE_PATH = 'data/predictions.csv'
WEIGHTS_PATH = 'data/weights/dogs_cats_net_{}_{}_{}.h5'.format(BATCH_SIZE, EPOCHS, OPTIMIZER)
CHECK_POINT_PREFIX = 'data/check_points/dogs_cats_net_{}_{}_{}'.format(BATCH_SIZE, STEPS_PER_EPOCH, OPTIMIZER)
CHECK_POINT_PATH = CHECK_POINT_PREFIX + '.{epoch:02d}-{val_loss:.4f}.pkl'
WEIGHTS_PATH

Using TensorFlow backend.


'data/weights/dogs_cats_net_64_20_<keras.optimizers.RMSprop object at 0x7fe15afd16a0>.h5'

In [2]:
model = Sequential()
model.add(Conv2D(32, 3, input_shape=(150, 150, 3), activation='relu', padding='same'))
model.add(Conv2D(32, 3, activation='relu', padding='same'))
model.add(MaxPooling2D())

model.add(Conv2D(64, 3, activation='relu', padding='same'))
model.add(Conv2D(64, 3, activation='relu', padding='same'))
model.add(MaxPooling2D())

model.add(Conv2D(128, 3, activation='relu', padding='same'))
model.add(Conv2D(128, 3, activation='relu', padding='same'))
model.add(MaxPooling2D())

model.add(Conv2D(256, 3, activation='relu', padding='same'))
model.add(Conv2D(256, 3, activation='relu', padding='same'))
model.add(MaxPooling2D())

model.add(Flatten())

model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))

model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))

model.add(Dense(1, activation='sigmoid'))

for layer in model.layers:
    print('{:20s}'.format(layer.name), layer.input_shape, layer.output_shape)

model.compile(loss='binary_crossentropy',
              optimizer=OPTIMIZER,
              metrics=['accuracy'])

conv2d_1             (None, 150, 150, 3) (None, 150, 150, 32)
conv2d_2             (None, 150, 150, 32) (None, 150, 150, 32)
max_pooling2d_1      (None, 150, 150, 32) (None, 75, 75, 32)
conv2d_3             (None, 75, 75, 32) (None, 75, 75, 64)
conv2d_4             (None, 75, 75, 64) (None, 75, 75, 64)
max_pooling2d_2      (None, 75, 75, 64) (None, 37, 37, 64)
conv2d_5             (None, 37, 37, 64) (None, 37, 37, 128)
conv2d_6             (None, 37, 37, 128) (None, 37, 37, 128)
max_pooling2d_3      (None, 37, 37, 128) (None, 18, 18, 128)
conv2d_7             (None, 18, 18, 128) (None, 18, 18, 256)
conv2d_8             (None, 18, 18, 256) (None, 18, 18, 256)
max_pooling2d_4      (None, 18, 18, 256) (None, 9, 9, 256)
flatten_1            (None, 9, 9, 256) (None, 20736)
dense_1              (None, 20736) (None, 256)
dropout_1            (None, 256) (None, 256)
dense_2              (None, 256) (None, 256)
dropout_2            (None, 256) (None, 256)
dense_3              (None, 256) (None,

In [3]:
train_datagen = ImageDataGenerator(
    rescale=1./255,
    horizontal_flip=True)

val_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    TRAIN_PATH,
    target_size=(150, 150),
    batch_size=BATCH_SIZE,
    class_mode='binary')

val_generator = val_datagen.flow_from_directory(
    VAL_PATH,
    target_size=(150, 150),
    batch_size=BATCH_SIZE,
    class_mode='binary')

Found 22500 images belonging to 2 classes.
Found 2500 images belonging to 2 classes.


In [4]:
if not os.path.exists(WEIGHTS_PATH):
    tb = TensorBoard(LOGS_PATH)
    checkpoint = ModelCheckpoint(CHECK_POINT_PATH, save_best_only=True)
    model.fit_generator(
        train_generator,
        steps_per_epoch=STEPS_PER_EPOCH,
        epochs=EPOCHS,
        validation_data=val_generator,
        validation_steps=VAL_EPOCHS,
        verbose=2,
        callbacks=[tb, checkpoint],
        initial_epoch=0)
    model.save_weights(WEIGHTS_PATH)
else:
    model.load_weights(WEIGHTS_PATH)

Epoch 1/20
251s - loss: 0.6377 - acc: 0.6237 - val_loss: 0.5706 - val_acc: 0.7016
Epoch 2/20
245s - loss: 0.5344 - acc: 0.7312 - val_loss: 0.4738 - val_acc: 0.7684
Epoch 3/20
245s - loss: 0.4716 - acc: 0.7777 - val_loss: 0.4454 - val_acc: 0.7892
Epoch 4/20
245s - loss: 0.4160 - acc: 0.8127 - val_loss: 0.3881 - val_acc: 0.8240
Epoch 5/20
245s - loss: 0.3750 - acc: 0.8330 - val_loss: 0.3479 - val_acc: 0.8472
Epoch 6/20
245s - loss: 0.3319 - acc: 0.8567 - val_loss: 0.4745 - val_acc: 0.7938
Epoch 7/20
245s - loss: 0.2977 - acc: 0.8726 - val_loss: 0.3452 - val_acc: 0.8668
Epoch 8/20
245s - loss: 0.2662 - acc: 0.8900 - val_loss: 0.2501 - val_acc: 0.8956
Epoch 9/20
245s - loss: 0.2371 - acc: 0.9006 - val_loss: 0.3090 - val_acc: 0.8896
Epoch 10/20
245s - loss: 0.2130 - acc: 0.9138 - val_loss: 0.2253 - val_acc: 0.9082
Epoch 11/20
245s - loss: 0.1943 - acc: 0.9215 - val_loss: 0.2131 - val_acc: 0.9084
Epoch 12/20
245s - loss: 0.1777 - acc: 0.9299 - val_loss: 0.2759 - val_acc: 0.8858
Epoch 13/20
2

In [5]:
test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_directory(
    TEST_PATH,
    target_size=(150, 150),
    batch_size=1,
    class_mode=None,
    shuffle=False)

raw_files = test_generator.filenames
raw_data = model.predict_generator(test_generator, 12500, verbose=1)

Found 12500 images belonging to 1 classes.

In [6]:
data = raw_data.copy()

files = [int(x.replace('test/', '').replace('.jpg', '')) for x in raw_files]
predictions = list(zip(files, [x[0] for x in data]))
predictions.sort(key=lambda x: x[0])
df = pd.DataFrame(predictions)
df = df.round({1: 4})
df.to_csv(SAVE_PATH, header=['id', 'label'], index=False)
df

Unnamed: 0,0,1
0,1,0.9932
1,2,1.0000
2,3,1.0000
3,4,1.0000
4,5,0.0003
5,6,0.0086
6,7,0.0000
7,8,0.7616
8,9,0.0000
9,10,0.0000
