##Importing relevant libraries

In [0]:
import os
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf

keras = tf.keras

##Adding some relevant parameters

In [0]:
img_dimension = 299
LSTM_size = 768
max_cap_len = 15

##Loading trained encoder and decoder

In [0]:
encoder = keras.models.load_model("Encoder.hdf5")
encoder.summary()

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_5 (InputLayer)         [(None, 299, 299, 3)]     0         
_________________________________________________________________
inception_v3 (Model)         (None, 8, 8, 2048)        21802784  
_________________________________________________________________
dense (Dense)                (None, 8, 8, 256)         524544    
_________________________________________________________________
flatten_1 (Flatten)          (None, 16384)             0         
Total params: 22,327,328
Trainable params: 524,544
Non-trainable params: 21,802,784
_________________________________________________________________


In [0]:
decoder = keras.models.load_model("Decoder.hdf5")
decoder.summary()

Model: "model_2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_10 (InputLayer)           [(None, 16384)]      0                                            
__________________________________________________________________________________________________
input_11 (InputLayer)           [(None, 1)]          0                                            
__________________________________________________________________________________________________
tf_op_layer_ExpandDims_1 (Tenso (None, 1, 16384)     0           input_10[0][0]                   
__________________________________________________________________________________________________
embedding (Embedding)           (None, 1, 256)       2560256     input_11[0][0]                   
____________________________________________________________________________________________

##Loading tokenizer

In [0]:
import pickle
# loading
with open('tokenizer.pickle', 'rb') as handle:
    tok = pickle.load(handle)

## Defining prediction functions
This function takes as its input the path of the image for captioing

In [0]:
def load_img(path):
  img = tf.io.read_file(path)
  img = tf.image.decode_jpeg(img, channels=3)
  img = tf.image.resize(img, (img_dimension, img_dimension))
  return img

In [0]:
from tensorflow.keras.applications.inception_v3 import preprocess_input

def caption_image(path):
  image = load_img(path)#/255.0
  img_input = preprocess_input(image)

  encodings = encoder.predict(tf.reshape(img_input,(1,img_dimension,img_dimension,3)))

  texts = ["<sos>"]
  h = np.zeros((1,LSTM_size))
  c = h
  for _ in range(max_cap_len + 1):
    dec_inp = np.array(tok.word_index.get(texts[-1])).reshape(1,-1)
    props, h, c = decoder.predict([encodings,h, c ,dec_inp])
    props= props[0]
    idx = np.argmax(props)
    
    texts.append(tok.index_word.get(idx))
    
    if idx == tok.word_index['<eos>']:
      break
  if tok.word_index.get(texts[-1]) != tok.word_index['<eos>']:
    texts.append('<eos>')
  print(' '.join(texts))
  plt.imshow(image/255.0)
  plt.axis("off")