In [1]:
# After unzipping training data manually, all 25000 images are in the 'train' folder.
# Use the script below to put dog images into 'train/dogs/' and cat images into 'train/cats/'

import os
import numpy as np

dir_train = 'train'
dir_train_dogs = 'train/dogs'
dir_train_cats = 'train/cats'

dir_valid = 'valid'
dir_valid_dogs = 'valid/dogs'
dir_valid_cats = 'valid/cats'

if not os.path.exists(dir_train_dogs):
    os.mkdir(dir_train_dogs)
    
if not os.path.exists(dir_train_cats):
    os.mkdir(dir_train_cats)
    
if not os.path.exists(dir_valid_dogs):
    os.mkdir(dir_valid_dogs)
    
if not os.path.exists(dir_valid_cats):
    os.mkdir(dir_valid_cats)

# Initially all images are put in 'train'
# separate dogs and cats in train/dogs and train/cats
[os.rename(os.path.join(dir_train, f), os.path.join(dir_train_dogs, f)) for f in os.listdir(dir_train) if 'dog.' in f]
[os.rename(os.path.join(dir_train, f), os.path.join(dir_train_cats, f)) for f in os.listdir(dir_train) if 'cat.' in f]

# put all images in valid folder back to train folder
[os.rename(os.path.join(dir_valid_dogs, f), os.path.join(dir_train_dogs, f)) for f in os.listdir(dir_valid_dogs)]
[os.rename(os.path.join(dir_valid_cats, f), os.path.join(dir_train_cats, f)) for f in os.listdir(dir_valid_cats)]

# select 1000 images for validation repectively
[os.rename(os.path.join(dir_train_dogs, f), os.path.join(dir_valid_dogs, f)) for f in np.random.choice(os.listdir(dir_train_dogs), 1000, replace=False)]
[os.rename(os.path.join(dir_train_cats, f), os.path.join(dir_valid_cats, f)) for f in np.random.choice(os.listdir(dir_train_cats), 1000, replace=False)]

print('There are ', len(os.listdir(dir_train_dogs)), 'dog train images')
print('There are ', len(os.listdir(dir_train_cats)), 'cat train images')
print('There are ', len(os.listdir(dir_valid_dogs)), 'dog valid images')
print('There are ', len(os.listdir(dir_valid_cats)), 'cat valid images')

There are  11500 dog train images
There are  11500 cat train images
There are  1000 dog valid images
There are  1000 cat valid images


In [2]:
# Declare the ImageDataGenerator of Keras

import keras
from keras.preprocessing.image import ImageDataGenerator
from keras.applications.resnet50 import preprocess_input

# train_datagen = ImageDataGenerator(featurewise_center=True, featurewise_std_normalization=True)
train_datagen = ImageDataGenerator(    
    horizontal_flip=True,
    zoom_range=0.05,
    fill_mode="constant",
    channel_shift_range=10,
    rotation_range=5,
    width_shift_range=0.05,
    height_shift_range=0.05
)
train_generator = train_datagen.flow_from_directory(
    dir_train, 
    target_size=(224, 224), 
    batch_size=16, 
    class_mode='categorical', 
    shuffle=True) 

valid_datagen = ImageDataGenerator()
valid_generator = valid_datagen.flow_from_directory(
    dir_valid, 
    target_size=(224, 224), 
    batch_size=16, 
    class_mode='categorical', 
    shuffle=True) 

Using TensorFlow backend.


Found 23000 images belonging to 2 classes.
Found 2000 images belonging to 2 classes.


In [3]:
# Randomly load 200 images for normalization
import numpy as np
import cv2

# X_train = []
# for f in np.random.choice(os.listdir(dir_train_dogs), 100, replace=False):
#     X_train.append(cv2.resize(cv2.imread(os.path.join(dir_train_dogs, f)), (224, 224)))
# for f in np.random.choice(os.listdir(dir_train_cats), 100, replace=False):
#     X_train.append(cv2.resize(cv2.imread(os.path.join(dir_train_cats, f)), (224, 224)))

# X_train = np.stack(X_train)

# # train_datagen.fit() outputs strange mean and std ...
# # manually calculate and set
# train_datagen.mean = np.mean(X_train, axis=(0,1,2))
# train_datagen.std = np.std(X_train, axis=(0,1,2))

# print('normalization mean:', train_datagen.mean)
# print('normalization std:', train_datagen.std)

# del X_train

In [25]:
# load pre-trained cnn model: 
from keras.applications.xception import Xception
from keras.applications.resnet50 import ResNet50
from keras.layers import Dense, Activation, GlobalAveragePooling2D, BatchNormalization
from keras.models import Model

base_model = ResNet50(
    include_top = False, 
    weights = 'imagenet', 
    input_shape = (224, 224, 3),
    pooling = 'None')

# Add a layer for two-class classification
last_layer = base_model.output
# last_layer = BatchNormalization(axis=3)(last_layer)
last_layer = GlobalAveragePooling2D()(last_layer)
last_layer = Dense(2, activation='softmax')(last_layer)

# Do not train original parameters
for layer in base_model.layers:
    layer.trainable = False

base_model.layers[-1].tranable = True
base_model.layers[-2].tranable = True
base_model.layers[-3].tranable = True
base_model.layers[-4].tranable = True

model = Model(input=base_model.input, output=last_layer)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# del base_model

print('model contains', len(model.layers), 'layers')

model contains 177 layers




In [24]:
# print(model.summary())
# print(base_model.layers[-1])
# print(base_model.layers[-2])
# print(base_model.layers[-3])
# print(base_model.layers[-4])

<keras.layers.core.Activation object at 0x7f2ba84109e8>
<keras.layers.merge.Add object at 0x7f2ba84829b0>
<keras.layers.normalization.BatchNormalization object at 0x7f2ba84b2080>
<keras.layers.convolutional.Conv2D object at 0x7f2ba84953c8>
-1
-2
-3
-4


In [30]:
%%time
# training process

history = model.fit_generator(train_generator, epochs=20, steps_per_epoch=100, validation_data=valid_generator, validation_steps=10)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
CPU times: user 13min 56s, sys: 12.8 s, total: 14min 9s
Wall time: 11min 51s


In [15]:
# change learning rate
from keras.optimizers import Adam

model.lr.set_value(0.00005)
history = model.fit_generator(train_generator, epochs=10, steps_per_epoch=100, validation_data=valid_generator, validation_steps=10)

AttributeError: 'Model' object has no attribute 'lr'

In [7]:
# set all layers to trainable
# from keras.optimizers import Adam

# adam = Adam(lr=0.00005)

# for layer in base_model.layers:
#     layer.trainable = True

# model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy'])    
# print(model.summary())    

In [8]:
%%time
# training process

# history = model.fit_generator(train_generator, epochs=5, steps_per_epoch=200, validation_data=valid_generator, validation_steps=10)

CPU times: user 5 µs, sys: 0 ns, total: 5 µs
Wall time: 8.58 µs


In [28]:
valid_acc = 0
valid_loss = 0
for i in range(120):
    (x, y) = valid_generator.next()
    valid_loss += model.evaluate(x, y, verbose=0)[0]
    valid_acc += model.evaluate(x, y, verbose=0)[1]
print('validation acc:', valid_acc/120)
print('validation loss:', valid_loss/120)

validation acc: 0.9713541666666666
validation loss: 0.11058699955631254


In [29]:
model.save('my_model_0212_1.h5')