In [None]:
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img

# Data pre-processing and data augmentation
datagen = ImageDataGenerator(
		rotation_range = 4.0,
		width_shift_range = 0.2,
		height_shift_range = 0.2,
		shear_range = 0.2,
		zoom_range = 0.2,
		horizontal_flip = True,
		fill_mode = 'nearest')

img = load_img('/content/sample_data/train/cats/cat.0.jpg') # this is a PIL image
x = img_to_array(img) # this is a Numpy array with shape (3, 150, 150)
x = x. reshape((1,) + x.shape) # this is a Numpy array with shape (1, 3, 150, 150)

# the .flow() command below generates batches of randomly transformed images
# and saves the results to the `preview/` directory
i = 0
for batch in datagen.flow(x, batch_size = 1,
                          save_to_dir = 'sample_data/preview', save_prefix='cat', save_format='jpeg'):
		i += 1
		if i > 20:
			break; # otherwise generator would loop indefinately


# My First CNN Model for Dogs vs Cats Classification
# Training a small convnet from scratch: 80% accuracy in 40 lines of code
# The right tool for an image classification job is a convnet i.e CNN
# The main focus for fighting overfitting should be the entropic 
# capacity of the model, how much information the model is allowed to store. 

from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense

model = Sequential()
model.add(Conv2D(32, (3,3), input_shape=(3, 150, 150), padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,1)))

model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

# the model so far outputs 3D feature maps (height, width, features)

model.add(Flatten())  # this converts our 3D feature maps to 1D feature vectors
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])


# Let's prepare our data. We will use .flow_from_directory()
# to generate batches of image data (and their labels) directly
# from our jpgs in their respective folders.

batch_size = 16

# this is the augmentation configuration we will use for training
train_datagen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True)

# this is the augmentation configuration we will use for testing:
# only rescaling
test_datagen = ImageDataGenerator(rescale=1./255)

# this is a generator that will read pictures found in
# subfolers of 'data/train', and indefinitely generate
# batches of augmented image data
train_generator = train_datagen.flow_from_directory(
        'sample_data/train',  # this is the target directory
        target_size=(150, 150),  # all images will be resized to 150x150
        batch_size=batch_size,
        class_mode='binary')  # since we use binary_crossentropy loss, we need binary labels

# this is a similar generator, for validation data
validation_generator = test_datagen.flow_from_directory(
        'sample_data/validation',
        target_size=(150, 150),
        batch_size=batch_size,
        class_mode='binary')
