# Dogs vs Cats
## Kaggle Dataset의 전부를 이용한 개, 고양이 구분
### Dog Image: 12,500개, Cat Image: 12,500개, 총 25,000개
### 출처: [pontoregende GitHub](https://github.com/pontorezende/Dogs-vs-Cats-Redux-with-CNN)

In [1]:
import numpy as np
from glob import glob
import cv2, os, random
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
import tensorflow as tf
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint

In [3]:
path='dogs-vs-cats/train'

In [4]:
## used for resize and in our model
ROW, COL = 96, 96

dogs, cats = [], []
y_dogs, y_cats = [], []

In [5]:
## Definition to load all our dog images
def load_dogs():
    print('Loading all dog images\n')
    dog_path = os.path.join(path, 'dog*')
    for dog_img in glob(dog_path):
        dog = cv2.imread(dog_img)
        dog = cv2.cvtColor(dog, cv2.COLOR_BGR2GRAY)
        dog = cv2.resize(dog, (ROW, COL))
        dog = image.img_to_array(dog)
        dogs.append(dog)
    print('All dog images loaded')

In [6]:
## Definition to load all our cat images
def load_cats():
    print('Loading all cat images\n')
    cat_path = os.path.join(path, 'cat*')
    for cat_img in glob(cat_path):
        cat = cv2.imread(cat_img)
        cat = cv2.cvtColor(cat, cv2.COLOR_BGR2GRAY)
        cat = cv2.resize(cat, (ROW, COL))
        cat = image.img_to_array(cat)
        cats.append(cat)
    print('All cat images loaded')

In [7]:
load_dogs()

Loading all dog images

All dog images loaded


In [8]:
load_cats()

Loading all cat images

All cat images loaded


In [9]:
classes = ['dog', 'cat']

In [10]:
## in case we want to see if our images was saved correctly in arrays we can use those codes
def show_dogs():
    plt.figure(figsize=(12,8))    
    for i in range(5):
        plt.subplot(1, 5, i+1)
        img = image.array_to_img(random.choice(dogs))
        plt.imshow(img)
        
        plt.axis('off')
        plt.title('Supposed to be a {}'.format(classes[0]))        
    plt.show()

In [11]:
def show_cats():
    plt.figure(figsize=(12,8))
    for i in range(5):
        plt.subplot(1, 5, i+1)
        img = image.array_to_img(random.choice(cats))
        plt.imshow(img)

        plt.axis('off')
        plt.title('Supposed to be a {}'.format(classes[1]))
    plt.show()

In [12]:
## just change the labels for 0 and  1
y_dogs = [1 for item in enumerate(dogs)]
y_cats = [0 for item in enumerate(cats)]

In [13]:
## converting everything to Numpy array to fit in our model
## them creating a X and target file like we used to see
## in Machine and Deep Learning models
dogs = np.asarray(dogs).astype('float32')
cats = np.asarray(cats).astype('float32')
y_dogs = np.asarray(y_dogs).astype('int32')
y_cats = np.asarray(y_cats).astype('int32')

In [14]:
## fit values between 0 and 1
dogs /= 255
cats /= 255

In [15]:
X = np.concatenate((dogs,cats), axis=0)
y = np.concatenate((y_dogs, y_cats), axis=0)

In [16]:
IMG_CHANNEL = 1
BATCH_SIZE = 128
N_EPOCH = 10
VERBOSE = 1
VALIDAION_SPLIT = .2
OPTIM = Adam()
N_CLASSES = len(classes)

In [17]:
## One-Hot Encoding
y = tf.keras.utils.to_categorical(y, N_CLASSES)

In [18]:
## Here is our model as a CNN
model = Sequential([
    Conv2D(32, (3,3), padding='same', input_shape=(ROW, COL, IMG_CHANNEL), activation='relu'),
    Conv2D(32, (3,3), padding='same', activation='relu'),
    MaxPooling2D(pool_size=(2,2)),
    Dropout(.25),
    Conv2D(64, (3,3), padding='same', activation='relu'),
    Conv2D(64, (3,3), padding='same', activation='relu'),
    MaxPooling2D(pool_size=(2,2)),
    Dropout(.25),
    Flatten(),
    Dense(512, activation='relu'),
    Dropout(.5),
    Dense(2, activation='softmax')
])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 96, 96, 32)        320       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 96, 96, 32)        9248      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 48, 48, 32)        0         
_________________________________________________________________
dropout (Dropout)            (None, 48, 48, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 48, 48, 64)        18496     
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 48, 48, 64)        36928     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 24, 24, 64)        0

In [19]:
model.compile(loss='categorical_crossentropy', optimizer=OPTIM, metrics=['accuracy'])

In [20]:
modelpath = "model/dogs_vs_cats-cnn-{epoch:02d}-{val_loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath=modelpath, monitor='val_loss', 
                               verbose=1, save_best_only=True)

In [21]:
## to save checkpoint to use latter
model.fit(X, y, batch_size=BATCH_SIZE, epochs=N_EPOCH, validation_split=VALIDAION_SPLIT,
          verbose=VERBOSE, callbacks=[checkpoint])

Train on 20000 samples, validate on 5000 samples
Epoch 1/10
Epoch 00001: val_loss improved from inf to 0.63676, saving model to model/dogs_vs_cats-cnn-01-0.6368.hdf5
Epoch 2/10
Epoch 00002: val_loss did not improve from 0.63676
Epoch 3/10
Epoch 00003: val_loss did not improve from 0.63676
Epoch 4/10
Epoch 00004: val_loss did not improve from 0.63676
Epoch 5/10
Epoch 00005: val_loss improved from 0.63676 to 0.56838, saving model to model/dogs_vs_cats-cnn-05-0.5684.hdf5
Epoch 6/10
Epoch 00006: val_loss did not improve from 0.56838
Epoch 7/10
Epoch 00007: val_loss improved from 0.56838 to 0.49426, saving model to model/dogs_vs_cats-cnn-07-0.4943.hdf5
Epoch 8/10
Epoch 00008: val_loss did not improve from 0.49426
Epoch 9/10
Epoch 00009: val_loss did not improve from 0.49426
Epoch 10/10
Epoch 00010: val_loss did not improve from 0.49426


<tensorflow.python.keras.callbacks.History at 0x1dbcb955b08>

In [26]:
from tensorflow.keras.models import load_model
del model
model = load_model('model/dogs_vs_cats-cnn-07-0.4943.hdf5')

In [27]:
scores = model.evaluate(X, y, verbose=2)
print('MODEL ACCURACY: %.5f' % scores[1])

25000/25000 - 104s - loss: 0.3015 - accuracy: 0.8719
MODEL ACCURACY: 0.87188
