In [1]:
import os
import pickle
import numpy as np
from tqdm.notebook import  tqdm

from keras.applications.vgg16 import VGG16, preprocess_input
from keras_preprocessing.image import load_img, img_to_array
from keras_preprocessing.text import Tokenizer
from keras_preprocessing.sequence import pad_sequences
from keras.models import Model
from keras.utils import to_categorical, plot_model
from keras.layers import Input, Dense, LSTM, Embedding, Dropout, add

In [2]:
BASEDIR = './kaggle/input/flickr8k'
WORKINDIR = '/kaggle/working'

In [3]:
#load vgg16 model
model = VGG16()

#Restructure model
model = Model(inputs=model.inputs, outputs=model.layers[-1].output)


In [4]:
#summarize 
print(model.summary())

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0     

In [5]:
#extra features from image
features = dict()
directory = os.path.join(BASEDIR, 'Images')

for img_name in tqdm(os.listdir(directory)):
    img_path = directory+'/'+img_name
    image = load_img(img_path, target_size=(224,224))
    #convert image pixel to numpy array
    image = img_to_array(image)
    #resize data for model
    image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
    #preprocess image for vgg
    image = preprocess_input(image)
    #extract features
    feature = model.predict(image, verbose = 0)
    #get image ID
    image_id = img_name.split('.')[0]
    #store feature
    features[image_id] = feature

  0%|          | 0/8091 [00:00<?, ?it/s]

In [None]:
#store features in pickle
pickle.dump(features, open(os.path.join(WORKINDIR, 'features'), 'wb'))

In [None]:
#load features from pickle
with open(os.path.join(WORKINDIR, 'features'), 'rb') as f:
    features = pickle.load(f)

In [None]:
# Load the captions data
with open(os.path.join(BASEDIR, 'captions.txt'), 'r') as f:
    next(f)
    captions_doc = f.read()