In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


In [2]:
import tensorflow
from tensorflow.keras import Sequential,layers,models,applications,optimizers
from tensorflow.keras.preprocessing.image import img_to_array,load_img,ImageDataGenerator 
import os
import glob
import zipfile
from zipfile import ZipFile

In [3]:
zip_files = ['test1', 'train']
#unzipping the files 
for zip_file in zip_files:
    with ZipFile("../input/dogs-vs-cats/{}.zip".format(zip_file),"r") as z:
        z.extractall(".")
        print("{} unzipped".format(zip_file))

In [4]:
img_folder = '../working/train'
files = os.listdir(img_folder)
WIDTH=150
HEIGHT=150

In [5]:
targets = list()
full_paths=list()

for file in files:
    target=file.split(".")[0]
    full_path = os.path.join(img_folder,file)
    full_paths.append(full_path)
    targets.append(target)
    
dataset= pd.DataFrame()
dataset['image_path'] = full_paths
dataset['target'] = targets

In [6]:
dataset.head()

In [7]:
dataset.target.value_counts().plot.bar()

In [8]:
#visualizing rando images from the dataset;
import random
import matplotlib.pyplot as plt
sample= random.choice(full_paths)
plt.imshow(plt.imread((sample)));

In [9]:
sample= random.choice(full_paths)
plt.imshow(plt.imread((sample)));

In [10]:
#creating a gallery of images of respective labels;
def get_side(img, side_type, side_size=5):
    height, width, channel=img.shape
    if side_type=="horizontal":
        return np.ones((height,side_size,  channel), dtype=np.float32)*255
        
    return np.ones((side_size, width,  channel), dtype=np.float32)*255

def show_gallery(show="both"):
    n=100
    counter=0
    images=list()
    vertical_images=[]
    rng_state = np.random.get_state()
    np.random.shuffle(full_paths)
    np.random.set_state(rng_state)
    np.random.shuffle(targets)
    for path, target in zip(full_paths, targets):
        if target!=show and show!="both":
            continue
        counter=counter+1
        if counter%100==0:
            break
        #Image loading from disk as JpegImageFile file format
        img=load_img(path, target_size=(WIDTH,HEIGHT))
        #Converting JpegImageFile to numpy array
        img=img_to_array(img)
        
        hside=get_side(img, side_type="horizontal")
        images.append(img)
        images.append(hside)

        if counter%10==0:
            himage=np.hstack((images))
            vside=get_side(himage, side_type="vertical")
            vertical_images.append(himage)
            vertical_images.append(vside)
            
            images=list()

    gallery=np.vstack((vertical_images)) 
    plt.figure(figsize=(12,12))
    plt.xticks([])
    plt.yticks([])
    title={"both":"Dogs and Cats",
          "cat": "Cats",
          "dog": "Dogs"}
    plt.title("100 samples of {} of the dataset".format(title[show]))
    plt.imshow(gallery.astype(np.uint8))

In [11]:
show_gallery(show='cat')
show_gallery(show='dog')
show_gallery(show='both')


When the images in galleries are examined, it is possible to make the following evaluations;

* There are many different types of cats
* There are many different types of dogs
* Some cat (dog) breeds are very similar to some dog (cat) breeds.
* The backgrounds in the images are very different and noisy.
* The difference in exposure of cats and dogs in the images is quite high.

In [12]:
model = Sequential()
model.add(layers.Conv2D(32,(3,3),activation='relu',input_shape=(WIDTH,HEIGHT,3)))
model.add(layers.Conv2D(32,(3,3),activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D((2,2)))
model.add(layers.Dropout(0.25))

model.add(layers.Conv2D(64,(3,3),activation='relu'))
model.add(layers.Conv2D(64,(3,3),activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D((2,2)))
model.add(layers.Dropout(0.25))

model.add(layers.Conv2D(128,(3,3),activation='relu'))
model.add(layers.Conv2D(128,(3,3),activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D((2,2)))
model.add(layers.Dropout(0.25))

model.add(layers.Conv2D(128,(3,3),activation='relu'))
model.add(layers.Conv2D(64,(3,3),activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D((2,2)))
model.add(layers.Dropout(0.25))

model.add(layers.Flatten())
model.add(layers.Dense(512,activation='relu'))
model.add(layers.Dropout(0.3))
model.add(layers.Dense(1,activation='sigmoid'))


model.summary()

In [13]:
model.compile(loss='binary_crossentropy',optimizer=optimizers.Adam(learning_rate=0.001),metrics=['accuracy'])


In [14]:
from sklearn.model_selection import train_test_split
dataset_train,dataset_test = train_test_split(dataset,test_size=0.2,random_state=1)

In [15]:
train_datagen = ImageDataGenerator(
    rotation_range=15,
    rescale= 1./255,
    shear_range=0.1,
    zoom_range=0.3,
    horizontal_flip=True,
    width_shift_range = 0.1,
    height_shift_range = 0.1
)

train_data_generator = train_datagen.flow_from_dataframe(dataframe=dataset_train,
                                                        x_col= 'image_path',
                                                        y_col='target',
                                                        target_size=(WIDTH,HEIGHT),
                                                        class_mode='binary',
                                                        batch_size=150)

In [16]:
test_datagen = ImageDataGenerator(rescale=1./255)

test_datagenerator=test_datagen.flow_from_dataframe(dataframe=dataset_test,
                                                   x_col="image_path",
                                                   y_col="target",
                                                   target_size=(WIDTH, HEIGHT),
                                                   class_mode="binary",
                                                   batch_size=150)

In [17]:
modelHistory = model.fit(train_data_generator,
                                  epochs=30,
                                  validation_data=test_datagenerator,
                                  validation_steps=dataset_test.shape[0]//150,
                                  steps_per_epoch=dataset_train.shape[0]//150)

Train Accuracy = 94.01%

Test Accuracy =  93.43%


In [21]:
def show_model_history(modelHistory, model_name):
    history=pd.DataFrame()
    history["Train Loss"]=modelHistory.history['loss']
    history["Validatin Loss"]=modelHistory.history['val_loss']
    history["Train Accuracy"]=modelHistory.history['accuracy']
    history["Validatin Accuracy"]=modelHistory.history['val_accuracy']
  
    history.plot(figsize=(12,8))
    plt.title(" Convulutional Model {} Train and Validation Loss and Accuracy History".format(model_name))
    plt.show()

In [22]:
print("Train Accuracy:{:.3f}".format(modelHistory.history['accuracy'][-1]))
print("Test Accuracy:{:.3f}".format(modelHistory.history['val_accuracy'][-1]))
show_model_history(modelHistory=modelHistory, model_name="")