In [1]:
import numpy as np
import glob
import re
import gc

import os
from pathlib import Path

In [2]:
path = Path(os.getcwd())
root = Path(path.parent.absolute()) 

preprocessed_image_path = root / 'Shared Preprocessed Objects' / 'Preprocessed Images for Inception'
model_path = root / 'Models' / 'Pretrained Inception'
output_path = root / 'Models' / 'Pretrained Inception' / 'Image Features Batches'

In [3]:
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.models import Model
def get_pretrained_inceptionV3():
    ## Initializing Pre-trained Model
    model = InceptionV3(weights='imagenet')
    ## Removing Softmax, as the problem is not about classification
    model2 = Model(model.input, model.layers[-2].output)
    return model2

In [4]:
inceptionV3 = get_pretrained_inceptionV3()

In [5]:
def get_features_pretrained_inception(inputs, model, batch_size = 64, show_progress = True):
    
    keys = list(inputs.keys())
    values = list(inputs.values())
    
    features = {}
    index = 0
    
    while index + batch_size < len(keys):
        if show_progress:
            print('extracted ' + str(index) + ' image features out of ' + str(len(keys)))
            
        batch = np.array(values[index:index+batch_size])
        feature_batch = model.predict(batch, verbose = 0, batch_size = 64)
        
        for num, feature in enumerate(feature_batch):
            features[keys[index + num]] = feature
        
        index += batch_size
    
    batch = np.array(values[index:])
    feature_batch = model.predict(batch, verbose = 0, batch_size = 64)
    for num, feature in enumerate(feature_batch):
            features[keys[index + num]] = feature
    
    return features
        #encoded_batch = model.predict(inputs[key], batch_size = batch_size)

In [6]:
paths = glob.glob(str(preprocessed_image_path) + '\*')
len(paths)

15

In [7]:
# This takes a long time!!! Around 1 hour in my settings
count_train = 0
count_test = 0
for path in paths:
    #print(path)
    tmp = np.load(path, allow_pickle=True).item()
    gc.collect()
    
    if 'train' in path:
        train_features = get_features_pretrained_inception(tmp, inceptionV3, batch_size = len(tmp))
        np.save(output_path / ('train_features_batch_' + str(count_train)), train_features)
        count_train += 1
    
    else:
        test_features = get_features_pretrained_inception(tmp, inceptionV3, batch_size = len(tmp))
        np.save(output_path / ('test_features_batch_'+ str(count_test)), test_features)
        count_test += 1


In [8]:
# Merge all batches into one big thing 
# This is possible because the feature extraction reduces dimensionality significantly
paths = glob.glob(str(output_path) + '\*')
len(paths)

15

In [9]:
train_features = {}
test_features = {}

for path in paths:
    tmp = np.load(path, allow_pickle=True).item()
    if 'train_' in path:
        train_features = train_features | tmp
    else:
        test_features = test_features | tmp

In [11]:
np.save(model_path / 'train_features_full', train_features)
np.save(model_path / 'test_features_full', test_features)

In [None]:
# # This takes a long time!!!
# print('--- Extracting Train Image Features ---')
# for path in paths:
#     if 'train' in path:
#         tmp = np.load()
# train_features = get_features_pretrained_inception(train_images_preprocessed, inceptionV3, batch_size = 128)
# np.save(output_path + 'train_features', train_features)

# print('--- Extracting Test Image Features ---')
# test_features = get_features_pretrained_inception(test_images_preprocessed, inceptionV3, batch_size = 128)
# np.save(output_path + 'test_features', test_features)