## Importing Libraries

In [1]:
import numpy as np 
import pandas as pd 

import tensorflow as tf
from tensorflow import keras
from keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns
import random

import os
print(os.listdir("../input/dogs-vs-cats"))

In [2]:
# Unzipping the files

import zipfile

with zipfile.ZipFile("../input/dogs-vs-cats/train.zip","r") as z:
    z.extractall(".")
    
with zipfile.ZipFile("../input/dogs-vs-cats/test1.zip","r") as z:
    z.extractall(".")  

In [3]:
# Retrieving a list of directories in each folder

DIR_TRAIN = "/kaggle/working/train/"
DIR_TEST = "/kaggle/working/test1"

train_imgs = os.listdir(DIR_TRAIN)
test_imgs = os.listdir(DIR_TEST)

In [4]:
print("Train images:")
train_imgs[:15]

In [5]:
print("Test images:")
test_imgs[:15]

## Loading a sample image

In [6]:
sample = random.choice(train_imgs)
image = load_img("/kaggle/working/train/"+sample)
plt.imshow(image)
plt.axis("off")
plt.show()

In [7]:
# Creating a DataFrame for our train set

category = [x.split(".")[0] for x in train_imgs]
df = pd.DataFrame({"Filename":train_imgs, "Category":category})
df.head()

In [8]:
# Visualizing the constituents of our train set

plt.figure(figsize=(6,6))
plt.pie(df['Category'].value_counts(), explode=[0.01,0.02], 
       autopct="%.2f%%", textprops={'color':'white', 'size':12,
                                   'weight':'bold'},
       startangle=45, colors = ['#947867', '#D49034'])
plt.legend(["Dogs","Cats"])
plt.show()

### Splitting the data into train & validation

In [9]:
# Splitting the train set, into a train & validation set with equal categories

df_train, df_valid = train_test_split(df, test_size = 5000, 
                                     stratify=df['Category'],
                                     random_state=42)

In [10]:
# Checking if categories are equal

df_train['Category'].value_counts()

In [11]:
df_train.reset_index(drop=True, inplace=True)
df_valid.reset_index(drop=True, inplace=True)

## PROCESSING THE DATA

### Data Augmentation

In [12]:
# Creating an Augmentation generator for the train set

train_datagen = ImageDataGenerator(
    rotation_range = 15,
    rescale = 1.0/255.0,
    zoom_range = 0.2,
    horizontal_flip = True,
    width_shift_range = 0.1,
    height_shift_range = 0.1,
)

train_generator = train_datagen.flow_from_dataframe(df_train, 
                                  directory = "/kaggle/working/train/",
                                  x_col = 'Filename',
                                  y_col = 'Category',
                                  target_size = (224, 224),
                                  class_mode = 'categorical',
                                  batch_size = 32
                                 )

In [13]:
# Creating the Augmentation generator for the valid set

valid_datagen = ImageDataGenerator(rescale = 1.0/255.0)

valid_generator = valid_datagen.flow_from_dataframe(df_valid, 
                                  directory = "/kaggle/working/train/",
                                  x_col = 'Filename',
                                  y_col = 'Category',
                                  target_size = (224, 224),
                                  class_mode = 'categorical',
                                  batch_size = 32
                                 )

# CREATING THE MODEL

### Creating the CNN model

In [15]:
from functools import partial

keras.backend.clear_session()


DefaultConv = partial(keras.layers.Conv2D, kernel_size = 3, strides = 1,
                     padding = 'same', activation = 'relu')

model = keras.models.Sequential([
    DefaultConv(filters = 32, kernel_size = 7, strides=2, input_shape=[224,224,3]),
    keras.layers.BatchNormalization(),
    keras.layers.MaxPool2D(pool_size=2),

    DefaultConv(filters = 64),
    DefaultConv(filters = 64),
    keras.layers.BatchNormalization(),
    keras.layers.Dropout(0.25),
    keras.layers.MaxPool2D(pool_size=2),
    
    DefaultConv(filters = 128),
    DefaultConv(filters = 128),
    keras.layers.BatchNormalization(),
    keras.layers.Dropout(0.25),
    keras.layers.MaxPool2D(pool_size=2),
    
    DefaultConv(filters = 256),
    DefaultConv(filters = 256),
    keras.layers.BatchNormalization(),
    keras.layers.Dropout(0.25),
    keras.layers.MaxPool2D(pool_size=2),
    
    DefaultConv(filters = 512),
    DefaultConv(filters = 512),
    keras.layers.BatchNormalization(),
    keras.layers.Dropout(0.4),
    keras.layers.MaxPool2D(pool_size=2),
    
    keras.layers.Flatten(),
    keras.layers.Dense(300, activation = 'relu', use_bias = False),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(2, activation = 'sigmoid')
])

model.compile(loss = "binary_crossentropy", optimizer = 'nadam',
             metrics = ['accuracy'])

In [16]:
model.summary()

### callbacks

In [17]:
Checkpoint_cb = keras.callbacks.ModelCheckpoint("model.h5", save_best_only=True)
Earlystopping_cb = keras.callbacks.EarlyStopping(patience=10)

callback = [Checkpoint_cb, Earlystopping_cb]

### Fitting the model

In [18]:
history = model.fit(
    train_generator,
    epochs = 15,
    validation_data = valid_generator,
    validation_steps = len(df_valid)/32,
    steps_per_epoch = len(df_train)/32,
    callbacks=callback
)

## Visualizing the Training

In [19]:
# Train and Validation Loss

plt.figure(figsize=(12,6))
plt.plot(history.history['loss'][1:], "ro-", label = "Train Loss")
plt.plot(history.history['val_loss'][1:], "b--", lw=3, label = "Validation Loss")
plt.legend(loc="upper right", fontsize=12)
plt.xlabel("epochs")
plt.ylabel("Loss")
plt.title("Train & Validation Loss (15 epochs)", fontsize=16)
plt.show()

In [20]:
# Train and Validation Accuracy

plt.figure(figsize=(12,6))
plt.plot(history.history['accuracy'], "ro-", label = "Train Accuracy")
plt.plot(history.history['val_accuracy'], "b--", lw=3, label = "Validation Accuracy")
plt.legend(loc="lower right", fontsize=12)
plt.xlabel("epochs")
plt.ylabel("Accuracy")
plt.title("Train & Validation Accuracy (15 epochs)", fontsize=16)
plt.show()

We can see from our chart that during training, we experienced some cases of overfitting. In general, the model performed well due to the addition of regularization and sensitivity techniques like `DataAugmentaion`, `BatchNormalization` and `DropOut`.

## PROCESSING THE TEST DATA

In [21]:
df_test = pd.DataFrame({'Filename':test_imgs})
df_test.head()

## Creating the Generator

In [22]:
test_datagen = ImageDataGenerator(rescale=1.0/255.0)

test_generator = test_datagen.flow_from_dataframe(
    df_test,
    directory = "/kaggle/working/test1/",
    x_col = 'Filename',
    y_col = None,
    class_mode = None,
    target_size = (224, 224),
    batch_size = 32,
    shuffle = False
)

In [23]:
# loading the best model

model = keras.models.load_model("model.h5")

In [24]:
model.summary()

## Prediction

In [25]:
pred = model.predict(test_generator, 
                     steps = np.ceil(df_test.shape[0]/32))

The model returns the probability an instace belongs to each category.

In [26]:
np.set_printoptions(suppress=True)

pred[:5]

We will add a category to our dataframe based on our predictions. If the first probabilty on each row is greater than 0.5, we assign it as `cat`, else `dog`.

In [27]:
category = []
for x in pred[:,0]:
    category.append("cat" if x > 0.5 else "dog")
    
df_test['Category'] = category

df_test.head()

## Visualizing our predictions

In [28]:
plt.figure(figsize=(6,6))
plt.pie(df_test['Category'].value_counts(), explode=[0.01,0.02], 
       autopct="%.2f%%", textprops={'color':'white', 'size':12,
                                   'weight':'bold'},
       startangle=45, colors = ['#947867', '#D49034'])
plt.legend(["Dogs","Cats"])
plt.show()

In [29]:
df_example = df_test.sample(50).reset_index(drop=True)

plt.figure(figsize=(18,12))

for i in range(50):
    plt.subplot(5,10,i+1)
    filename = df_example['Filename'][i]
    category = df_example['Category'][i]
    image = load_img('/kaggle/working/test1/'+filename)
    plt.imshow(image)
    plt.title(f"Prediction: {category}")
    plt.axis("off")
plt.tight_layout()
plt.show()