In [1]:
import numpy as np
import pandas as pd
from sklearn.cross_validation import train_test_split

from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.optimizers import SGD
from keras.utils import np_utils
from keras.callbacks import LearningRateScheduler
from keras.constraints import maxnorm
from keras.regularizers import l2
from random import shuffle

from matplotlib import pyplot as plt
%matplotlib inline

import cv2
import glob
import os
import re

Using Theano backend.
Using gpu device 0: GeForce GTX TITAN X (CNMeM is disabled, cuDNN 5005)


In [2]:
train_path = "/home/aiml_test_user/Shaheen/trainDC"

ROWS = 128
COLS = 128
CHANNELS = 3

images = [img for img in os.listdir(train_path)]
images_dog = [img for img in os.listdir(train_path) if "dog" in img]
images_cat = [img for img in os.listdir(train_path) if "cat" in img]


train_list = images_dog + images_cat

shuffle(train_list)

train = np.ndarray(shape=(len(train_list),ROWS, COLS))
labels = np.ndarray(len(train_list))

for i, img_path in enumerate(train_list):
    img = cv2.imread(os.path.join(train_path, img_path), 0)
    img = cv2.resize(img, (ROWS, COLS), interpolation=cv2.INTER_CUBIC)
    
    train[i] = img
    if "dog" in img_path:
        labels[i] = 1
    else:
        labels[i] = 0

In [4]:
train1 = np.array(train).reshape((-1, 1, 128, 128)).astype('float32')

train1.shape

## dividing by 255
train1 /= 255

In [5]:

## splitting the data into train and test
from sklearn.cross_validation import train_test_split
x_train, x_test, y_train, y_test = train_test_split(train1, labels, test_size=0.3)


In [12]:
img_size = 128

## CNN

def cnn_model():
    model = Sequential()
    model.add(Convolution2D(32, 3, 3, border_mode='same', input_shape=(1, img_size, img_size), activation='relu'))
    model.add(Convolution2D(32, 3, 3, activation='relu',border_mode='same'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
    
    model.add(Convolution2D(64, 3, 3, border_mode='same', activation='relu'))
    model.add(Convolution2D(64, 3, 3, activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.2))

    model.add(Convolution2D(128, 3, 3, border_mode='same', activation='relu'))
    model.add(Convolution2D(128, 3, 3, activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.2))

    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1))
    model.add(Activation('sigmoid'))

    return model


model1 = cnn_model()


model1.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

In [13]:
## fitting the model
batch_size = 32
nb_epoch = 20

model1.fit(x_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch,
          verbose=1, validation_data=(x_test, y_test))

Train on 17500 samples, validate on 7500 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7fa5327ca128>

In [14]:
validation = model1.evaluate(x_test, y_test, verbose=1)
print('Test accuracy:', validation[1])



In [8]:
img_size = 128

def catdog():
    
    model = Sequential()

    model.add(Convolution2D(32, 3, 3, border_mode='same', input_shape=(1, ROWS, COLS), activation='relu'))
    model.add(Convolution2D(32, 3, 3, border_mode='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.2))

    model.add(Convolution2D(64, 3, 3, border_mode='same', activation='relu'))
    model.add(Convolution2D(64, 3, 3, border_mode='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.2))
    
    model.add(Convolution2D(128, 3, 3, border_mode='same', activation='relu'))
    model.add(Convolution2D(128, 3, 3, border_mode='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.2))
    
    model.add(Convolution2D(256, 3, 3, border_mode='same', activation='relu'))
    model.add(Convolution2D(256, 3, 3, border_mode='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.2))


    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(0.5))
    
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))

    model.add(Dense(1))
    model.add(Activation('sigmoid'))
    return model


model3 = catdog()


model3.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

In [9]:
## fitting the model
batch_size = 32
nb_epoch = 14

model3.fit(x_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch,
          verbose=1, validation_data=(x_test, y_test))

Train on 17500 samples, validate on 7500 samples
Epoch 1/14
Epoch 2/14
Epoch 3/14
Epoch 4/14
Epoch 5/14
Epoch 6/14
Epoch 7/14
Epoch 8/14
Epoch 9/14
Epoch 10/14
Epoch 11/14
Epoch 12/14
Epoch 13/14
Epoch 14/14


<keras.callbacks.History at 0x7fa537392128>

In [15]:
validation = model3.evaluate(x_test, y_test, verbose=1)
print('Test accuracy:', validation[1])

Test accuracy: 0.902133333365


In [16]:
## data augmentation
X_train, X_val, Y_train, Y_val = train_test_split(train1, labels, test_size=0.2)


datagen = ImageDataGenerator(featurewise_center=False, 
                            featurewise_std_normalization=False, 
                            width_shift_range=0.1,
                            height_shift_range=0.1,
                            zoom_range=0.2,
                            shear_range=0.1,
                            rotation_range=10.,)

datagen1 = ImageDataGenerator(rotation_range=40,
                              width_shift_range=0.2,
                              height_shift_range=0.2,
                              shear_range=0.2,
                              zoom_range=0.2,
                              horizontal_flip=True)


datagen1.fit(X_train)

In [19]:
# reinitialise the model

model2 = cnn_model()


model2.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

nb_epoch = 15
batch_size = 32
model2.fit_generator(datagen1.flow(X_train, Y_train, batch_size=batch_size),
                            samples_per_epoch=X_train.shape[0],
                            nb_epoch=nb_epoch,
                            validation_data=(X_val, Y_val))

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x7fa5199b0f28>

In [20]:
validation = model2.evaluate(x_test, y_test, verbose=1)
print('Test accuracy:', validation[1])



In [21]:
## uploading and processing test data
test_path = "/home/aiml_test_user/Shaheen/testDC"

ROWS = 128
COLS = 128
CHANNELS = 3

images = [img for img in os.listdir(test_path)]

files = [ os.path.join('/home/aiml_test_user/Shaheen/testDC',str(i)+'.jpg') for i in range(1,12501) ]


test = np.ndarray(shape=(len(files),ROWS, COLS))

for i, img_path in enumerate(images):
    img = cv2.imread(os.path.join(test_path, img_path), 0)
    img = cv2.resize(img, (ROWS, COLS), interpolation=cv2.INTER_CUBIC)
    
    test[i] = img
    
    
#for fname in files:
    #img = cv2.imread(os.path.join(test_path, img_path), 0)
    #img = cv2.resize(img, (ROWS, COLS), interpolation=cv2.INTER_CUBIC)
    
    #test[i] = img
                     
                     

In [22]:
test1 = np.array(test).reshape((-1, 1, 128, 128)).astype('float32')


## dividing by 255
test1 /= 255

test1.shape

(12500, 1, 128, 128)

In [23]:
## predicting the test data
y_pred = model3.predict(test1)

# save results
np.savetxt('submission_DogsvCatsKaggleProb.csv', np.c_[range(1,len(test1)+1),y_pred], delimiter=',', header = 'id,label', comments = '')