## Import Modules

In [1]:
# Standard libraries
import os
import pickle
import numpy as np
from tqdm.notebook import tqdm
import re

# ML/Deep Learning frameworks
from tensorflow import keras
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.models import Model
from keras.preprocessing.image import load_img, img_to_array


## Data directory

In [2]:
DATA_DIRECTORY = './drive/Othercomputers/My MacBook Air/imageCaptionGenerator/dataset/'
WORKING_DIRECTORY = './drive/Othercomputers/My MacBook Air/imageCaptionGenerator/'

## Extract image features and store in a pickle file


In [3]:
## load vgg16 model
model = VGG16() 
## exclude last layer
model = Model(inputs=model.inputs, outputs=model.layers[-2].output) 
# print(model.summary())

## extract features from image
features = {}
directory = os.path.join(DATA_DIRECTORY, 'Images')
for img_name in tqdm(os.listdir(directory)):
  ## load image from file
  img_file = directory + '/' + img_name
  image = load_img(img_file, target_size=(224,224))
  ## reshape image to numpy array
  image = img_to_array(image)
  ## reshape data for model (VGG16 takes in 4d array)
  image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
  ## preprocess image for customized to vgg model
  image = preprocess_input(image)
  ## extract features
  feature = model.predict(image, verbose=0) # toggle off any other display
  ## get image ID
  image_id = img_name.split('.')[0] # first index after splitting image name 
  ## store features
  features[image_id] = feature


## Store features in pickle to avoid re-downloading
pickle.dump(features, open(os.path.join(WORKING_DIRECTORY, 'features.pkl'), 'wb')) # write-binary

  0%|          | 0/8091 [00:00<?, ?it/s]

## Load features pickle file

In [9]:
with open(os.path.join(WORKING_DIRECTORY, 'features.pkl'), 'rb') as f:
  features = pickle.load(f)

##  Load captions file

In [10]:
with open(os.path.join(DATA_DIRECTORY, 'captions.txt'), 'r') as f:
  next(f) # ignore first line in captions.txt
  captions = f.read()

## Map image to captions

In [11]:
mapping = {}
for line in tqdm(captions.split('\n')):
  if len(line) < 2:
    continue
  ## split line by comma
  tokens = line.split(',')
  ## get id and caption
  image_id, caption = tokens[0].split('.')[0], ''.join(tokens[1:])

  ## group captions for same images tgt
  if image_id not in mapping:
    mapping[image_id] = [caption]
  else:
    mapping[image_id].append(caption)

  0%|          | 0/40456 [00:00<?, ?it/s]

In [12]:
# len(mapping) # just quick check of image length

8091

## Preprocess caption data

In [13]:
def clean(mapping):
  for key, captions in mapping.items():
    for i in range(len(captions)):
      caption = captions[i]
      ## convert to lowercase and remove trailing spaces
      caption = caption.lower().strip()
      ## remove special chars
      caption = re.sub("[^A-Za-z ]", "", caption)
      ## remove additional white spaces between words
      caption = re.sub("[ +]", " ", caption)
      ## indicate start and end tag
      caption = '<start> ' + caption + ' <end>'
      ## update
      captions[i] = caption

## Testing result of preprocessing [optional]

In [14]:
print("Preprocessed: ", mapping['109202756_b97fcdc62c'])
clean(mapping)
print("Processed: ", mapping['109202756_b97fcdc62c'])

Preprocessed:  ['A woman dressed in a blue jacket and blue jeans rides a brown horse near a frozen lake and snow-covered mountain .', 'A woman in a blue jacket rides a brown pony near water .', 'A woman rides a horse near a frozen lake in the wintertime .', 'A young blond woman sitting atop a brown draft horse in the snowy mountains .', 'Woman n blue jacket sits on daft horse near a frozen lake .']


NameError: ignored