# Keras VGG16 feature extractor

In [None]:
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications import imagenet_utils
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.preprocessing.image import load_img
from sklearn.preprocessing import LabelEncoder
from pyimagesearch.io import HDF5DatasetWriter
from imutils import paths
import numpy as np
import progressbar
import argparse
import random
import os

In [None]:
ap = argparse.ArgumentParser()

DATASET_PATH = ''
OUTPUT_PATH = ''
BUFFER_SIZE = 1000
BATCH_SIZE = 32


In [None]:
print("[INFO] loading images...")
imagePaths = list(paths.list_images(DATASET_PATH))
random.shuffle(imagePaths)

labels = [p.split(os.path.sep)[-2] for p in imagePaths]
le = LabelEncoder()
labels = le.fit_transform(labels)

print("[INFO] loading network...")
model = VGG16(weights='imagenet', include_top=False)

dataset = HDF5DatasetWriter(
    (
        len(imagePaths),
        512*7*7
    ), OUTPUT_PATH,
    dataKey = "features",
    bufSize = BUFFER_SIZE
)

dataset.storeClassLabels(le.classes_)

widgets = [
    "Extracting Features: ",
    progressbar.Percentage()," ",
    progressbar.Bar(), " ",
    progressbar.ETA(), " "
]

pbar = progressbar.ProgressBar(
    maxval = len(imagePaths),
    widgets=widgets,
).start()

for i in np.arange(0, len(imagePaths), BATCH_SIZE):
    batchPaths = imagePaths[i:i + BATCH_SIZE]
    batchLabels = labels[i:i+BATCH_SIZE]
    batchImages = []

    for (j, imPath) in enumerate(batchPaths):
        image = load_img(imPath, target_size = (224,224))
        image = img_to_array(image)

        image = np.expand_dims(image, axis=0)
        image = imagenet_utils.preprocess_input(image)

        batchImages.append(image)

    batchImages = np.vstack(batchImages)
    
    features = model.predict(batchImages, batch_size=BATCH_SIZE)
    features = features.reshape((features.shape[0], 512*7*7))

    dataset.add(features, batchLabels)
    pbar.update(i)

dataset.close()
pbar.finish()