## Training a model using bottleneck features of a pre-trained network 

* https://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html

In [1]:
import numpy as npp
import math
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Activation, Dropout, Flatten, Dense
from keras import backend as K
from keras import applications

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
# dimensions of our images.
img_width, img_height = 256, 256

train_data_dir = r"C:\Users\**\Desktop\jupyter\DL\CNN egs\Data\train"
validation_data_dir = r"C:\Users\**\Desktop\jupyter\DL\CNN egs\Data\validation"
nb_train_samples = 2000
nb_validation_samples = 600
epochs = 10
batch_size = 32

In [3]:
datagen= ImageDataGenerator(rescale=1. /255)

In [4]:
#downloading and building vgg16 model
vgg16_model=applications.VGG16(include_top=False)
#only downloading upto the dense layer

In [5]:
generator = datagen.flow_from_directory(
        train_data_dir,
        target_size=(img_width, img_height),
        batch_size=batch_size,
        class_mode=None, # this means our generator will only yield batches of data, not labels
        shuffle=False) # our data will be in order, so all first 1000 images will be cats, then 1000 dogs

#the predict_generator method returns the output of a model(features), passing a generator that yields the batches of numpy data.
predict_size_train = int(math.ceil(nb_train_samples / batch_size))
bottleneck_features_train = vgg16_model.predict_generator(generator, predict_size_train)

npp.save(open('bottleneck_features_vgg16_train.npy', 'wb'), bottleneck_features_train)

Found 2000 images belonging to 2 classes.


In [6]:
generator = datagen.flow_from_directory(
    validation_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode=None,
    shuffle=False)

predict_size_validation = int(math.ceil(nb_validation_samples / batch_size))
bottleneck_features_validation = vgg16_model.predict_generator(generator, predict_size_validation)

npp.save(open('bottleneck_features_vgg16_validation.npy', 'wb'), bottleneck_features_validation)

Found 600 images belonging to 2 classes.


In [7]:
train_data = npp.load(open('bottleneck_features_vgg16_train.npy', 'rb'))
# the features were saved in order, so recreating the labels is easy
train_labels = npp.array([0] * (nb_train_samples // 2) + [1] * (nb_train_samples // 2))

In [8]:
validation_data = npp.load(open('bottleneck_features_vgg16_validation.npy', 'rb'))
validation_labels = npp.array([0] * (nb_validation_samples // 2) + [1] * (nb_validation_samples // 2))

In [9]:
#completing the model
model = Sequential()
model.add(Flatten(input_shape=train_data.shape[1:]))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_1 (Flatten)          (None, 32768)             0         
_________________________________________________________________
dense_1 (Dense)              (None, 256)               8388864   
_________________________________________________________________
dropout_1 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 257       
Total params: 8,389,121
Trainable params: 8,389,121
Non-trainable params: 0
_________________________________________________________________


In [10]:
model.fit(train_data, train_labels,
          epochs=epochs,
          batch_size=batch_size,
          validation_data=(validation_data, validation_labels))

Train on 2000 samples, validate on 600 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x2558ede3f98>

In [11]:
model.save_weights('2ndconvnet_bottleneck_features_weights.h5')

* As observed the validation accuracy is jumped from 65%(1st_convnet) to **89%** by simply using the pretrained weights of vgg16 model trained on ImageNet dataset.