# Feature extraction with VGG-16

In [68]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import glob

In [21]:
import tensorflow as tf
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Model

### creating model

In [4]:
vgg = tf.keras.applications.vgg16.VGG16()

In [22]:
output = vgg.layers[-2].output

In [33]:
# acts as encoder
model = Model(inputs=vgg.input , outputs=output)

### extracting features

In [30]:
def extract_features(img):
    img = image.load_img(img,target_size=(224,224))
    img = image.img_to_array(img)
    img = np.expand_dims(img,axis=0)
    img = tf.keras.applications.vgg16.preprocess_input(img)
    features = model.predict(img)
    return features

### Build feature_set

In [69]:
dataset_path = 'E:/Web/Dataset/Flickr8k/Images/'
caption_path = 'E:/Web/Dataset/Flickr8k/captions.txt'

In [106]:
feature_set = {}
for img in glob.glob(dataset_path+'*.*'):
    
    feature = extract_features(img)
    
    # get image name only
    img_name = os.path.basename(img).split('.')[0]
    
    feature_set[img_name] = feature

In [109]:
len(feature_set)

8091

### Saving feature_set

In [110]:
import pickle

In [111]:
with open('feature_set.pickle','wb') as f:
    pickle.dump(feature_set,f,protocol=pickle.HIGHEST_PROTOCOL)