In [1]:
import os
import pickle 
import numpy as np
from tqdm.notebook import tqdm
from tensorflow.keras.models import load_model
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.preprocessing.text import Tokenizer 
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model
from tensorflow.keras.utils import load_img,img_to_array
from tensorflow.keras.utils import to_categorical,plot_model
from tensorflow.keras.layers import Input,Dense,LSTM,Embedding,Dropout,add

In [2]:
model_temp = VGG16()
model_temp = Model(inputs=model_temp.inputs, outputs=model_temp.layers[-2].output)
print(model_temp.summary())

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0     

In [3]:
def feature_generator(image):
    image = load_img(image, target_size=(224, 224))
    image = img_to_array(image)
    image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
    image = preprocess_input(image)
    feature = model_temp.predict(image, verbose=0)
#     image_id = img_name.split('.')[0]
    return feature

In [4]:
model=load_model('image_captioning.h5',compile=False)

In [5]:
model.compile(loss='categorical_crossentropy',optimizer='adam')

In [6]:
with open("tokenizer.pkl",'rb') as token:
    tokenizer=pickle.load(token)

In [7]:
def idx_to_word(integer,tokenizer):
    for word,index in tokenizer.word_index.items():
        if index == integer:
            return word
    return None

In [8]:
def predict_captions(model,image,tokenizer,max_length):
    in_text='startseq'
#     max_length=35
    for i in range(max_length):
        sequence=tokenizer.texts_to_sequences([in_text])[0] #doubt
        sequence=pad_sequences([sequence],maxlen=max_length) #doubt
        prediction=model.predict([image,sequence],verbose=0)
        prediction=np.argmax(prediction)
        word=idx_to_word(prediction,tokenizer)
        if word is None:
            break
        in_text+=" "+word
        if word=="endseq":
            break
    return in_text

In [11]:
feature=feature_generator('images.png')
predict_captions(model,feature,tokenizer,35)

'startseq man in black coat and black pants is displaying up up in the snow endseq'