In [1]:
# After unzipping training data manually, all 25000 images are in the 'train' folder.
# Use the script below to put dog images into 'train/dogs/' and cat images into 'train/cats/'

import os
import numpy as np

dir_train = 'train'
dir_train_dogs = 'train/dogs'
dir_train_cats = 'train/cats'

dir_valid = 'valid'
dir_valid_dogs = 'valid/dogs'
dir_valid_cats = 'valid/cats'

if not os.path.exists(dir_train_dogs):
    os.mkdir(dir_train_dogs)
    
if not os.path.exists(dir_train_cats):
    os.mkdir(dir_train_cats)
    
if not os.path.exists(dir_valid_dogs):
    os.mkdir(dir_valid_dogs)
    
if not os.path.exists(dir_valid_cats):
    os.mkdir(dir_valid_cats)

# Initially all images are put in 'train'
# separate dogs and cats in train/dogs and train/cats
[os.rename(os.path.join(dir_train, f), os.path.join(dir_train_dogs, f)) for f in os.listdir(dir_train) if 'dog.' in f]
[os.rename(os.path.join(dir_train, f), os.path.join(dir_train_cats, f)) for f in os.listdir(dir_train) if 'cat.' in f]

# select 500 images for validation repectively
# [os.rename(os.path.join(dir_train_dogs, f), os.path.join(dir_valid_dogs, f)) for f in np.random.choice(os.listdir(dir_train_dogs), 500, replace=False)]
# [os.rename(os.path.join(dir_train_cats, f), os.path.join(dir_valid_cats, f)) for f in np.random.choice(os.listdir(dir_train_cats), 500, replace=False)]

print('There are ', len(os.listdir(dir_train_dogs)), 'dog train images')
print('There are ', len(os.listdir(dir_train_cats)), 'cat train images')
print('There are ', len(os.listdir(dir_valid_dogs)), 'dog valid images')
print('There are ', len(os.listdir(dir_valid_cats)), 'cat valid images')

There are  12000 dog train images
There are  12000 cat train images
There are  500 dog valid images
There are  500 cat valid images


In [2]:
# 1000 image for training
import cv2
import numpy as np
import keras

X_train = []
y_train = []
for f in np.random.choice(os.listdir(dir_train_dogs), 500, replace=False):
    X_train.append(cv2.resize(cv2.imread(os.path.join(dir_train_dogs, f)), (224, 224)))
    y_train.append(1)
for f in np.random.choice(os.listdir(dir_train_cats), 500, replace=False):
    X_train.append(cv2.resize(cv2.imread(os.path.join(dir_train_cats, f)), (224, 224)))
    y_train.append(0)
    
X_train = np.stack(X_train).astype(float)
y_train = keras.utils.to_categorical(y_train, 2)


print('There are', X_train.shape[0], 'training data')
print('X_train.shape =', X_train.shape)
print('y_train.shape =', y_train.shape)

Using TensorFlow backend.


There are 1000 training data
X_train.shape = (1000, 224, 224, 3)
y_train.shape = (1000, 2)


In [3]:
# prepare validation data
X_valid = []
y_valid = []
for f in os.listdir(dir_valid_dogs):
    X_valid.append(cv2.resize(cv2.imread(os.path.join(dir_valid_dogs, f)), (224, 224)))
    y_valid.append(1)
for f in os.listdir(dir_valid_cats):
    X_valid.append(cv2.resize(cv2.imread(os.path.join(dir_valid_cats, f)), (224, 224)))
    y_valid.append(0)
    
X_valid = np.stack(X_valid).astype(float)
y_valid = keras.utils.to_categorical(y_valid, 2)


print('There are', X_valid.shape[0], 'validation data')
print('X_valid.shape =', X_valid.shape)
print('y_valid.shape =', y_valid.shape)

There are 1000 validation data
X_valid.shape = (1000, 224, 224, 3)
y_valid.shape = (1000, 2)


In [16]:
# import gc
# from keras import backend as K
# K.clear_session()
# gc.collect()

119

In [4]:
# load pre-trained cnn model: 
from keras.applications.xception import Xception
from keras.applications.resnet50 import ResNet50
from keras.layers import Dense, Activation, GlobalAveragePooling2D, BatchNormalization
from keras.models import Model

base_model = ResNet50(
    include_top = False, 
    weights = 'imagenet', 
    input_shape = (224, 224, 3),
    pooling = 'None')

# Add a layer for two-class classification
last_layer = base_model.output
# last_layer = BatchNormalization(axis=3)(last_layer)
last_layer = GlobalAveragePooling2D()(last_layer)
last_layer = Dense(2, activation='softmax')(last_layer)

# Do not train original parameters
for layer in base_model.layers:
    layer.trainable = False

model = Model(input=base_model.input, output=last_layer)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

del base_model

print('model contains', len(model.layers), 'layers')



model contains 177 layers




In [5]:
# print(model.summary())

In [6]:
# training process

history = model.fit(X_train, y_train, epochs=10, batch_size=16, validation_data=(X_valid[:200], y_valid[:200]))

Train on 1000 samples, validate on 200 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [8]:
r = model.evaluate(X_train, y_train)



In [10]:
r = model.evaluate(X_valid, y_valid)
print(r)

[0.11063935218552069, 0.964]


In [11]:
model.save('my_model_no_generator.h5')