In [1]:
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Model
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dense, Dropout, Flatten
from keras.callbacks import ModelCheckpoint, TensorBoard
from keras import optimizers
from keras.applications.inception_v3 import InceptionV3
import pandas as pd

import os

STEPS_PER_EPOCH = 352
BATCH_SIZE = 64
EPOCHS = 20
VAL_EPOCHS = 40

LOGS_PATH = 'data/logs'
TRAIN_PATH = 'data/train'
VAL_PATH = 'data/validation'
TEST_PATH = 'data/test'
SAVE_PATH = 'data/predictions.csv'

Using TensorFlow backend.


In [2]:
train_datagen = ImageDataGenerator(
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True
)

val_datagen = ImageDataGenerator(
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True
)

train_generator = train_datagen.flow_from_directory(
    TRAIN_PATH,
    target_size=(150, 150),
    batch_size=BATCH_SIZE,
    class_mode='binary')

val_generator = val_datagen.flow_from_directory(
    VAL_PATH,
    target_size=(150, 150),
    batch_size=BATCH_SIZE,
    class_mode='binary')

Found 22500 images belonging to 2 classes.
Found 2500 images belonging to 2 classes.


In [None]:
base_model = InceptionV3(include_top=False, pooling='max')
x = base_model.output
x = Dense(1024, activation='relu')(x)
x = Dense(1, activation='sigmoid')(x)

model = Model(inputs=base_model.input, outputs=x)

for layer in base_model.layers:
    layer.trainable = False
    
model.compile(optimizer='Adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
tb = TensorBoard(LOGS_PATH)
model.fit_generator(
    train_generator,
    steps_per_epoch=STEPS_PER_EPOCH,
    epochs=EPOCHS,
    validation_data=val_generator,
    validation_steps=VAL_EPOCHS,
    verbose=2,
    callbacks=[tb])

Epoch 1/20


In [6]:
test_datagen = ImageDataGenerator()
test_generator = test_datagen.flow_from_directory(
    TEST_PATH,
    target_size=(150, 150),
    batch_size=1,
    class_mode=None,
    shuffle=False)

raw_files = test_generator.filenames
raw_data = model.predict_generator(test_generator, 12500, verbose=1)

Found 12500 images belonging to 1 classes.

In [7]:
data = raw_data.copy()

files = [int(x.replace('test/', '').replace('.jpg', '')) for x in raw_files]
predictions = list(zip(files, [x[0] for x in data]))
predictions.sort(key=lambda x: x[0])
df = pd.DataFrame(predictions)
df = df.round({1: 4})
df.to_csv(SAVE_PATH, header=['id', 'label'], index=False)
df

Unnamed: 0,0,1
0,1,0.9999
1,2,1.0000
2,3,1.0000
3,4,0.9999
4,5,0.0018
5,6,0.0630
6,7,0.0002
7,8,0.0352
8,9,0.0669
9,10,0.0341
