In [0]:
from keras.applications import VGG16
from keras.applications import imagenet_utils
from keras.preprocessing.image import img_to_array
from keras.preprocessing.image import load_img
from sklearn.preprocessing import LabelEncoder
from imutils import paths
import numpy as np
import progressbar
import random
import os
import sys

In [14]:
# mount Google Drive
# note that authorization code might be required
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [0]:
# add local libraries to system path
# hdf5_dataset_writer.py should be visible to the interpreter
# eg. placed in My Drive/Colab Notebooks/utilities/io
sys.path.append('/content/gdrive/My Drive/Colab Notebooks/utilities/io')
from hdf5_dataset_writer import HDF5DatasetWriter

In [0]:
# set paths
datasetPath = r'/content/gdrive/My Drive/KrakN/database'
outputPath = r"/content/gdrive/My Drive/KrakN/database/features"

In [0]:
# set/check dataset & output path, delete previous output if exists
if not os.path.exists(datasetPath):
    print("Dataset at {}\nDoes not exist!\nQuitting now".format(datasetPath))
    quit()

In [18]:
batchSize = 256
bufferSize = 1000

# load images and shuffle them
print("Loading images...")
imagePaths = list(paths.list_images(datasetPath))
random.shuffle(imagePaths)
print("{} images loaded".format(len(imagePaths)))

Loading images...
22 images loaded


In [0]:
# get scale factor
splitted = imagePaths[0].split('_')
scale = splitted[-1]
scale = scale[:-4]

In [0]:
# extract class labels and encode them
labels = [p.split(os.path.sep)[-2] for p in imagePaths]
le = LabelEncoder()
labels = le.fit_transform(labels)

In [21]:
# load VGG16 network excluding final FC layers
print("loading network...")
model = VGG16(weights="imagenet", include_top=False)

loading network...


In [0]:
# initialize dataset writer
dataset = HDF5DatasetWriter((len(imagePaths), 512 * 7 * 7), outputPath + "_s_{}".format(scale) + '.hdf5', "features", bufferSize)
dataset.storeClassLabels(le.classes_)

In [0]:
# initialize progress bar
widgets = ["Extracting Features: ", progressbar.Percentage(), " ", progressbar.Bar(), " ", progressbar.ETA()]
pbar = progressbar.ProgressBar(maxval=len(imagePaths), widgets=widgets).start()

In [0]:
# loop over images
for i in np.arange(0, len(imagePaths), batchSize):
    batchPaths = imagePaths[i:i + batchSize]
    batchLabels = labels[i:i + batchSize]
    batchImages = []

    for (j, imagePath) in enumerate(batchPaths):
        # load and resize image
        image = load_img(imagePath, target_size=(224, 224))
        image = img_to_array(image)

        # preprocess image by expanding and subtracting mean RGB value
        image = np.expand_dims(image, axis=0)
        image = imagenet_utils.preprocess_input(image)

        # add image to batch
        batchImages.append(image)

    # pass images thr network
    batchImages = np.vstack(batchImages)
    features = model.predict(batchImages, batch_size=batchSize)

    # reshape features
    features = features.reshape((features.shape[0], 512 * 7 * 7))

    # add features and labels to dataset
    dataset.add(features, batchLabels)
    pbar.update(i)

dataset.close()
pbar.finish()