In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
print(os.listdir("../input"))

# Any results you write to the current directory are saved as output.

In [None]:
!ls ../input/training_set/training_set/

In [None]:
import matplotlib.pyplot as plt

In [None]:
img = plt.imread('../input/training_set/training_set/cats/cat.1.jpg')
img.shape

### Builiding the model

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPool2D, Flatten, Dropout
from keras.optimizers import Adam, RMSprop
from keras.preprocessing import image

In [None]:
model = Sequential()
model.add(Conv2D(32, (3,3), activation='relu', input_shape=(150, 150, 3)))
model.add(Conv2D(32, (3,3), activation='relu'))
model.add(MaxPool2D(2,2))
model.add(Dropout(0.2))
model.add(Conv2D(64, (3,3), activation='relu'))
model.add(Conv2D(64, (3,3), activation='relu'))
model.add(MaxPool2D(2,2))
model.add(Dropout(0.2))
model.add(Flatten())

model.add(Dense(512, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

In [None]:
model.compile(RMSprop(0.0001), 'binary_crossentropy', metrics=['acc'])

### Preparing data

In [None]:
from keras.preprocessing.image import ImageDataGenerator

In [None]:
train_dir = '../input/training_set/training_set/'
test_dir = '../input/test_set/test_set/'

In [None]:
train_generator = ImageDataGenerator(rescale=1./255)
train = train_generator.flow_from_directory(train_dir,
                                         target_size=(150,150), 
                                         batch_size=32,
                                         class_mode='binary')

test_generator = ImageDataGenerator(rescale=1./255)
test = test_generator.flow_from_directory(test_dir,
                                         target_size=(150,150), 
                                         batch_size=32,
                                         class_mode='binary')

### Training the model

In [None]:
model.fit_generator(train, steps_per_epoch=20, epochs=20, validation_data=test, validation_steps=2)

In [None]:
plt.plot(range(20), model.history.history['acc'])
plt.plot(range(20), model.history.history['val_acc'])
plt.legend(['acc', 'val_acc'])

### Data Augmentation with keras ImageDataGenerator

Now we have a clear idea that our model is overfitting .  Overfitting is caused by having too few samples to learn from. Given infinite data, your model would be exposed to every possible aspect of the data distribution at hand. In such situations we can use data augmentation.

Data Augmentation takes the approach of generating more training data from existing training samples, by augmenting the samples via a number of random transformations that yield believable-loking images, The goal is that at training time, your model will never see the exact same picture twice. This helps expose the model to more aspects of the data and generalize better.

In keras, its super easy!!

In [None]:
datagen = image.ImageDataGenerator( rotation_range=40,
                              width_shift_range=0.2,
                              height_shift_range=0.2,
                              shear_range=0.2,
                              zoom_range=0.2,
                              horizontal_flip=True,
                              fill_mode='nearest')

In [None]:
img = image.load_img('../input/training_set/training_set/cats/cat.1.jpg', target_size=(150,150))
X = image.img_to_array(img)
X = X.reshape((1,150,150,3))

In [None]:
i = 0
plt.figure(figsize=(10,8))
for batch in datagen.flow(X, batch_size=1):
#     print(batch.shape)
    plt.subplot(221+i)
    plt.imshow(image.array_to_img(batch[0]))
    
    i += 1
    if i%4 == 0:
        break

In [None]:
train_datagen = image.ImageDataGenerator(rescale=1./255,
                                   rotation_range= 40,
                                   width_shift_range = 0.2,
                                   height_shift_range = 0.2,
                                   shear_range = 0.2,
                                   zoom_range = 0.2,
                                   horizontal_flip = True)

train = train_datagen.flow_from_directory(train_dir,
                                         target_size=(150,150), 
                                         batch_size=32,
                                         class_mode='binary')

test_datagen = image.ImageDataGenerator(rescale=1./255,
                                   rotation_range= 40,
                                   width_shift_range = 0.2,
                                   height_shift_range = 0.2,
                                   shear_range = 0.2,
                                   zoom_range = 0.2,
                                   horizontal_flip = True)

test = test_datagen.flow_from_directory(test_dir,
                                       target_size=(150,150),
                                       batch_size=32,
                                       class_mode= 'binary')

In [None]:
model = Sequential()
model.add(Conv2D(32, (3,3), activation='relu', input_shape=(150, 150, 3)))
model.add(MaxPool2D(2,2))
model.add(Conv2D(64, (3,3), activation='relu'))
model.add(MaxPool2D(2,2))
model.add(Conv2D(128, (3,3), activation='relu'))
model.add(Conv2D(128, (3,3), activation='relu'))
model.add(MaxPool2D(2,2))
model.add(Dropout(0.2))
model.add(Flatten())

model.add(Dense(512, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

In [None]:
model.compile(Adam(0.00001), 'binary_crossentropy', metrics=['acc'])

model.fit_generator(train, steps_per_epoch=100, epochs=10, validation_data=test, validation_steps=20)

In [None]:
plt.plot(range(10), model.history.history['acc'])
plt.plot(range(10), model.history.history['val_acc'])
plt.legend(['acc', 'val_acc'])

In [None]:
model.save('cat_dog.h5')