# 5.2 CNN trained with small dataset
In this notebook, we're gonna solve problem of classification dogs vs cats. Dataset has 4000 images, equal for both classes.

## 5.2.2 Load *Dogs vs Cats* dataset
We're gonna load it from Kaggle - it was challange in 2013.

In [1]:
import os, shutil

In [6]:
original_dataset_dir = r"..\data\Dogs vs Cats\PetImages"
base_dir = r"..\data\Dogs vs Cats\NN Input"

In [5]:
os.path.exists(base_dir)

True

In [7]:
train_dir = os.path.join(base_dir, 'train')
os.mkdir(train_dir)
test_dir = os.path.join(base_dir, 'test')
os.mkdir(test_dir)
val_dir = os.path.join(base_dir, 'validation')
os.mkdir(val_dir)

In [8]:
def make_dir(parent_dir, name):
    made_dir = os.path.join(parent_dir, name)
    os.mkdir(made_dir)
    return made_dir

In [11]:
train_cats_dir = make_dir(train_dir, 'cats')
train_dogs_dir = make_dir(train_dir, 'dogs')

val_cats_dir = make_dir(val_dir, 'cats')
val_dogs_dir = make_dir(val_dir, 'dogs')

test_cats_dir = make_dir(test_dir, 'cats')
test_dogs_dir = make_dir(test_dir, 'dogs')

In [19]:
def copy_data_to_split(dest_dir, label, ids=range(1000)):
    fnames = ['{}.jpg'.format(i) for i in ids]
    for fname in fnames:
        src = os.path.join(original_dataset_dir + "\\" + label, fname)
        dst = os.path.join(dest_dir, fname)
        shutil.copyfile(src, dst)

In [20]:
copy_data_to_split(train_cats_dir, 'Cat')
copy_data_to_split(train_dogs_dir, 'Dog')

copy_data_to_split(val_cats_dir, 'Cat', range(1000, 1500))
copy_data_to_split(val_dogs_dir, 'Dog', range(1000, 1500))

copy_data_to_split(test_cats_dir, 'Cat', range(1500, 2000))
copy_data_to_split(test_dogs_dir, 'Dog', range(1500, 2000))

In [21]:
print('train cats:', len(os.listdir(train_cats_dir)))

train cats: 1000


In [22]:
print('train dogs:', len(os.listdir(train_dogs_dir)))

train dogs: 1000


## 5.2.3 Prepare CNN

In [23]:
from keras import layers
from keras import models

model = models.Sequential()

In [24]:
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten())
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

In [25]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 148, 148, 32)      896       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 74, 74, 32)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 72, 72, 64)        18496     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 36, 36, 64)       0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 34, 34, 128)       73856     
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 17, 17, 128)      0

In [28]:
from tensorflow.keras import optimizers

model.compile(loss='binary_crossentropy',
              optimizer=optimizers.RMSprop(learning_rate=1e-4),
              metrics=['acc'])