In [None]:
import os
import pickle
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from skimage import io
from sklearn.cross_validation import train_test_split

from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.optimizers import SGD
from keras.utils import np_utils

In [None]:
%matplotlib inline

In [None]:
PUG_IMG_DIR = "./pugs_cropped"
GOLDEN_RETRVR_IMG_DIR = "./golden_retrievers_cropped"
IMG_ROWS, IMG_COLS = 256, 256
IMG_CHANNELS = 3

In [None]:
n_pug_images = len(os.listdir(PUG_IMG_DIR))
pug_images = np.empty((n_pug_images, IMG_CHANNELS, IMG_ROWS, IMG_COLS), dtype="uint8")

n_golden_retrvr_images = len(os.listdir(GOLDEN_RETRVR_IMG_DIR))
golden_retrvr_images = np.empty((n_golden_retrvr_images, IMG_CHANNELS, IMG_ROWS, IMG_COLS), dtype="uint8")

In [None]:
for n, image in enumerate(os.listdir(PUG_IMG_DIR)):
    pug_images[n] = io.imread(PUG_IMG_DIR+"/"+image).transpose()

for n, image in enumerate(os.listdir(GOLDEN_RETRVR_IMG_DIR)):
    golden_retrvr_images[n] = io.imread(GOLDEN_RETRVR_IMG_DIR+"/"+image).transpose()

In [None]:
print(pug_images.shape)
print(golden_retrvr_images.shape)

In [None]:
pug_labels = np.ones(n_pug_images)
golden_retrvr_labels = np.zeros(n_golden_retrvr_images)

In [None]:
plt.axis('off')
plt.imshow(pug_images[921].transpose())

In [None]:
plt.axis('off')
plt.imshow(golden_retrvr_images[921].transpose())

In [None]:
X = np.concatenate([pug_images, golden_retrvr_images])
y = np.concatenate([pug_labels, golden_retrvr_labels])

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.75, stratify=y)

In [None]:
with open("./pugs_vs_golden_retrvrs_data.pkl.gz", "wb") as pickle_file:
    pickle.dump((X_train, X_test, y_train, y_test), pickle_file)

In [None]:
with open("./pugs_vs_golden_retrvrs_data.pkl.gz", "rb") as pickle_file:
    X_train, X_test, y_train, y_test = pickle.load(pickle_file)

In [None]:
batch_size = 32
n_classes = 2
n_epochs = 200
data_augmentation = True

In [None]:
Y_train = np_utils.to_categorical(y_train, n_classes)
Y_test = np_utils.to_categorical(y_test, n_classes)

In [None]:
model = Sequential()

model.add(Convolution2D(32, 3, 3, border_mode='same',
                        input_shape=(IMG_CHANNELS, IMG_ROWS, IMG_COLS)))
model.add(Activation('relu'))
model.add(Convolution2D(32, 3, 3))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Convolution2D(64, 3, 3, border_mode='same'))
model.add(Activation('relu'))
model.add(Convolution2D(64, 3, 3))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(n_classes))
model.add(Activation('softmax'))

In [None]:
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd)

X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255

if not data_augmentation:
    print('Not using data augmentation.')
    model.fit(X_train, Y_train, batch_size=batch_size,
              nb_epoch=n_epochs, show_accuracy=True,
              validation_data=(X_test, Y_test), shuffle=True)
else:
    print('Using real-time data augmentation.')

    # this will do preprocessing and realtime data augmentation
    datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=0,  # randomly rotate images in the range (degrees, 0 to 180)
        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=True,  # randomly flip images
        vertical_flip=False)  # randomly flip images

    # compute quantities required for featurewise normalization
    # (std, mean, and principal components if ZCA whitening is applied)
    datagen.fit(X_train)

    # fit the model on the batches generated by datagen.flow()
    model.fit_generator(datagen.flow(X_train, Y_train, batch_size=batch_size),
                        samples_per_epoch=X_train.shape[0],
                        nb_epoch=n_epochs, show_accuracy=True,
                        validation_data=(X_test, Y_test),
                        nb_worker=1)