In [None]:
'''
All the imports go here

'''
#scientific
import numpy as np
import pandas as pd
from sklearn.cross_validation import train_test_split
from PIL import Image

#System
import os
import random
from glob import glob

#OpenCV
import cv2

#Tflearn
import tflearn
from tflearn.data_utils import shuffle, to_categorical
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.conv import conv_2d, max_pool_2d
from tflearn.layers.estimator import regression
from tflearn.data_preprocessing import ImagePreprocessing
from tflearn.data_augmentation import ImageAugmentation
from tflearn.metrics import Accuracy

In [None]:
'''
In this cell we entry with the location of the training folder and count the number of files on it
'''

images_dir = "train/"
num_files = len(glob(os.path.join(images_dir, '*.jpg')))

In [None]:
'''
The number of files in the folder are been used to start the numpy zeros arrays,
preparing it to te target and the predictors variables.
We also set the image size that we will use
'''
img_size = 64
all_X = np.zeros((num_files, img_size, img_size, 3), dtype='float64')
all_y = np.zeros(num_files)

In [None]:
'''
This function apply the histogram equalization in the image and after resize
'''
def transform_images(img, width, height):
    img[:, :, 0] = cv2.equalizeHist(img[:, :, 0])
    img[:, :, 1] = cv2.equalizeHist(img[:, :, 1])
    img[:, :, 2] = cv2.equalizeHist(img[:, :, 2])
    img = cv2.resize(img, (width, height), interpolation = cv2.INTER_CUBIC)
    return img

In [None]:
'''
The function bellow prepare the train set:
We read all the images in the folder
For each image we transform it (using transform_images above)
And after put the image in the pre prepared all_X np array, also we put the label in the all_y array
'''
def make_train_set(data_folder):
    i = 0
    image_filenames = os.listdir(data_folder)
    for image_filename in image_filenames:
        image_path = os.path.join(data_folder, image_filename)
        img = cv2.imread(image_path)
        img = transform_images(img, img_size, img_size)
        all_X[i] = np.array(img)
        all_y[i] = 0 if 'dog' in str(image_filename) else 1
        i += 1  

In [None]:
#Running the function above
make_train_set(images_dir)

In [None]:
'''
With the sklearn train_test_split we prepare our test and train data
The test size is 0.1 because bellow this the model didn't generalize well
'''
X, X_test, Y, Y_test = train_test_split(all_X, all_y, test_size=0.1, random_state=42)

In [None]:
'''
Here we transform the label in complete arrays
of the same size of the number of the classes we are trying to predictusing to_categorical
it Converts a class vector (integers) to binary class matrix
We using this for use with categorical_crossentropy after.
'''
Y = to_categorical(Y, 2)
Y_test = to_categorical(Y_test, 2)

In [None]:
'''
In this section we first normalized the mean and the standard deviation in the images:
First we zero center every sample with specified mean.
After  we scale each sample by the specified standard deviation.
'''
img_prepocessing = ImagePreprocessing()
img_prepocessing.add_featurewise_zero_center()
img_prepocessing.add_featurewise_stdnorm()


'''
Now, after some studies (Explaineds in the relatory),
we use the Tf learn ImageAugmentation functions to rotate and flipping some of our images
creating a synthetic training data
'''
img_aug = ImageAugmentation()
img_aug.add_random_flip_leftright()
img_aug.add_random_rotation(max_angle=25.)

In [None]:
'''
This cell is the most important one, where we create our CNN
    The input layer receive a 64x64 image with 3 color channels (RGB)
    The layer number 1 is a Convolution layer with 32 filters, each 3x3x3. Sequel by a Max Pooling layer(2)
    The layers numer 3 and 4 are convolution layers with 64 filters and RELU activation, sequels by a Max Pool Layer (5)
    The layer numer 6 is a FC layer with about 512 nodes
    The layer number 7 is a Droput layer, added to prevent overfitting
    The Layer number 8 is a FC layer with two outputs and softmax activation
    
After that we configure how the network will be trained and prepare it into a model
'''

#input Layer
network = input_data(shape=[None, 64, 64, 3],
                     data_preprocessing=img_prepocessing,
                     data_augmentation=img_aug)

# Layer 1
conv_1 = conv_2d(network, 32, 3, activation='relu', name='conv_1')

# Layer 2
network = max_pool_2d(conv_1, 2)

# Layer 3
conv_2 = conv_2d(network, 64, 3, activation='relu', name='conv_2')

# Layer 4
conv_3 = conv_2d(conv_2, 64, 3, activation='relu', name='conv_3')

# Layer 5
network = max_pool_2d(conv_3, 2)

# Layer 6
network = fully_connected(network, 512, activation='relu')

# Layer 7
network = dropout(network, 0.5)

# Layer 8
network = fully_connected(network, 2, activation='softmax')


acc = Accuracy(name="Accuracy")
network = regression(network, optimizer='adam',
                     loss='categorical_crossentropy',
                     learning_rate=0.0005, metric=acc)


model = tflearn.DNN(network, checkpoint_path='model_cat_dog_10.tflearn', max_checkpoints = 3,
                    tensorboard_verbose = 3, tensorboard_dir='tmp/tflearn_logs/')

In [None]:
'''
In this cell we train the CNN with a number N of epochs and save the logs to use with tensor board
We also save the model to use to predict some images after all
'''
model.fit(X, Y, validation_set=(X_test, Y_test), batch_size=500,
      n_epoch=10, run_id='model_cat_dog_10', show_metric=True)

model.save('model_cat_dog_10_final.tflearn')