In [1]:
import os
import glob
import h5py
import numpy as np
from keras import backend as K
from keras.preprocessing.image import ImageDataGenerator
from keras.utils.data_utils import get_file
from keras.models import Sequential
from keras.layers import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.layers import Activation, Dropout, Flatten, Dense

Using TensorFlow backend.


In [2]:
weights_path = './vgg16_weights.h5'
top_model_weights_path = 'bottleneck_fc_model.h5'
img_width, img_height = 150, 150
print(K.image_dim_ordering())

train_data_dir = 'data/trains/'
validation_data_dir = 'data/validations'

nb_train_samples = 2000
nb_validation_samples = 800
nb_epoch = 30

tf


In [3]:
def load_models(weights_path):
    model_vgg = Sequential()
    model_vgg.add(ZeroPadding2D((1, 1), input_shape=(img_width, img_height,3)))
    model_vgg.add(Convolution2D(64, 3, 3, activation='relu', name='conv1_1'))
    model_vgg.add(ZeroPadding2D((1, 1)))
    model_vgg.add(Convolution2D(64, 3, 3, activation='relu', name='conv1_2'))
    model_vgg.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model_vgg.add(ZeroPadding2D((1, 1)))
    model_vgg.add(Convolution2D(128, 3, 3, activation='relu', name='conv2_1'))
    model_vgg.add(ZeroPadding2D((1, 1)))
    model_vgg.add(Convolution2D(128, 3, 3, activation='relu', name='conv2_2'))
    model_vgg.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model_vgg.add(ZeroPadding2D((1, 1)))
    model_vgg.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_1'))
    model_vgg.add(ZeroPadding2D((1, 1)))
    model_vgg.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_2'))
    model_vgg.add(ZeroPadding2D((1, 1)))
    model_vgg.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_3'))
    model_vgg.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model_vgg.add(ZeroPadding2D((1, 1)))
    model_vgg.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_1'))
    model_vgg.add(ZeroPadding2D((1, 1)))
    model_vgg.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_2'))
    model_vgg.add(ZeroPadding2D((1, 1)))
    model_vgg.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_3'))
    model_vgg.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model_vgg.add(ZeroPadding2D((1, 1)))
    model_vgg.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_1'))
    model_vgg.add(ZeroPadding2D((1, 1)))
    model_vgg.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_2'))
    model_vgg.add(ZeroPadding2D((1, 1)))
    model_vgg.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_3'))
    model_vgg.add(MaxPooling2D((2, 2), strides=(2, 2)))

    f = h5py.File(weights_path)
    for k in range(f.attrs['nb_layers']):
        if k >= len(model_vgg.layers) - 1:
        # we don't look at the last two layers in the savefile (fully-connected and activation)
            break
        g = f['layer_{}'.format(k)]
        weights = [g['param_{}'.format(p)] for p in range(g.attrs['nb_params'])]
        layer = model_vgg.layers[k]

        if layer.__class__.__name__ in ['Convolution1D', 'Convolution2D', 'Convolution3D', 'AtrousConvolution2D']:
            weights[0] = np.transpose(weights[0], (2, 3, 1, 0))

        layer.set_weights(weights)
    f.close()
    model_vgg.summary()
    print("VGG16 Model with No Top loaded...")
    return model_vgg

In [4]:
vgg16_notop_model = load_models(weights_path)

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
zeropadding2d_1 (ZeroPadding2D)  (None, 152, 152, 3)   0           zeropadding2d_input_1[0][0]      
____________________________________________________________________________________________________
conv1_1 (Convolution2D)          (None, 150, 150, 64)  1792        zeropadding2d_1[0][0]            
____________________________________________________________________________________________________
zeropadding2d_2 (ZeroPadding2D)  (None, 152, 152, 64)  0           conv1_1[0][0]                    
____________________________________________________________________________________________________
conv1_2 (Convolution2D)          (None, 150, 150, 64)  36928       zeropadding2d_2[0][0]            
___________________________________________________________________________________________

In [5]:
def save_bottleneck_features(notop_model):
    datagen = ImageDataGenerator(rescale=1./255)
    
    generator = datagen.flow_from_directory(
        train_data_dir,
        target_size=(img_width, img_height),
        batch_size=16,
        class_mode=None,
        shuffle=False)
    print("Train generator is made.. by Keras")
    
    bottleneck_features_train = notop_model.predict_generator(generator, nb_train_samples)
    np.save(open('bottleneck_features_train.npy', 'wb'), bottleneck_features_train)
    print("bottleneck_features_train.npy is created..")
    
    generator = datagen.flow_from_directory(
            validation_data_dir,
            target_size=(img_width, img_height),
            batch_size=4,
            class_mode=None,
            shuffle=False)
    print("Validation generator is made.. by Keras")

    bottleneck_features_validation = notop_model.predict_generator(generator, nb_validation_samples)
    np.save(open('bottleneck_features_validation.npy', 'wb'), bottleneck_features_validation)
    print("bottleneck_features_validation.npy is created..")

In [6]:
save_bottleneck_features(vgg16_notop_model)

Found 2000 images belonging to 2 classes.
Train generator is made.. by Keras
bottleneck_features_train.npy is created..
Found 800 images belonging to 2 classes.
Validation generator is made.. by Keras
bottleneck_features_validation.npy is created..


In [7]:
def train_top_model(top_model_weigths_path):
    train_data = np.load(open('bottleneck_features_train.npy', 'rb'))
    # 0: cat, 1: dog
    train_labels = np.array([0]*(nb_train_samples//2) + [1]*(nb_train_samples//2))

    validation_data = np.load(open('bottleneck_features_validation.npy', 'rb'))
    validation_labels = np.array([0]*(nb_validation_samples//2) + [1]*(nb_validation_samples//2))

    model = Sequential()
    model.add(Flatten(input_shape=train_data.shape[1:]))
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])

    model.fit(train_data, train_labels,
              nb_epoch=nb_epoch, batch_size=16,
              validation_data=(validation_data, validation_labels),
              verbose=1)
    model.save_weights(top_model_weights_path)
    print("Bottle neck weights are saved...")

In [8]:
train_top_model(top_model_weights_path)

Train on 2000 samples, validate on 800 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Bottle neck weights are saved...
