In [1]:
import numpy as np
import progressbar 
import random 
import os
import cv2

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import LabelBinarizer

from keras.applications import VGG16
from keras.applications import imagenet_utils
from keras.preprocessing.image import img_to_array
from keras.datasets import cifar10


import matplotlib.pyplot as plt
%matplotlib inline

from helpers import HDF5DatasetWriter
from helpers import Utils

Using TensorFlow backend.


In [2]:
import tensorflow as tf
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
session = tf.Session(config=config)

In [3]:
output_path_train = "../input/data/cifar/vgg16_features_train.hdf5"
output_path_test = "../input/data/cifar/vgg16_features_test.hdf5"
batch_size = 32
buffer_size = 1000

In [4]:
model = VGG16(weights='imagenet', include_top=False)

In [5]:
labels_name = np.array(["airplane", "automobile", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck"])

In [6]:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

In [7]:
def extract_feature(model, x, y, output_path, batch_size = 32, buffer_size = 1000):
    dataset = HDF5DatasetWriter((x.shape[0], 512 * 7 * 7), (y.shape[0], 10), output_path, dataKey="features",  bufSize = buffer_size)
    dataset.storeClassLabels(labels_name)
    
    lb = LabelBinarizer()
    y = lb.fit_transform(y)
    
    widgets = ["Extracting Features: ", progressbar.Percentage(), " ", progressbar.Bar(), " ", progressbar.ETA()]
    pbar = progressbar.ProgressBar(maxval=x.shape[0], widgets=widgets).start()

    for i in np.arange(0, x.shape[0], batch_size):
        batchData = x[i:i + batch_size]
        batchLabels = y[i:i + batch_size]
        batchImages = []
        for (j, image) in enumerate(batchData):
            image = cv2.resize(image, (224, 224))
            image = img_to_array(image)
            image = np.expand_dims(image, axis=0)
            image = imagenet_utils.preprocess_input(image) / 255.0
            batchImages.append(image)
        batchImages = np.vstack(batchImages)
        features = model.predict(batchImages, batch_size=batch_size)
        features = features.reshape((features.shape[0], 512 * 7 * 7))
        dataset.add(features, batchLabels)
        pbar.update(i)
    dataset.close()
    pbar.finish()

In [9]:
extract_feature(model, x_train, y_train, output_path_train, batch_size = 32, buffer_size = 1000)

Extracting Features: 100% |#####################################| Time: 0:13:43


In [8]:
extract_feature(model, x_test, y_test, output_path_test, batch_size = 32, buffer_size = 1000)

Extracting Features: 100% |#####################################| Time: 0:02:22


In [4]:
import h5py
db = h5py.File(output_path_train)
list(db.keys())

['features', 'label_names', 'labels']