In [1]:
from pickle import load
from keras.models import load_model
from keras.preprocessing.sequence import pad_sequences
from keras.applications.xception import Xception
from keras.applications.inception_v3 import InceptionV3
from PIL import Image
import numpy as np


from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array

In [2]:
def word_for_id(integer, tokenizer):
    for word, index in tokenizer.word_index.items():
        if index == integer:
            return word
    return None

In [3]:
def extract_features(filename):
    # model = XceptionV3(include_top=False, pooling='avg' )
    model = InceptionV3(include_top=False, pooling='avg' )
    features = {}
    
    print(filename)
    image = Image.open(filename)
    image = image.resize((299,299))
    image = np.array(image)
    # for images that has 4 channels, we convert them into 3 channels
    if image.shape[2] == 4: 
        image = image[..., :3]
    image = np.expand_dims(image, axis=0)
    image = image/127.5
    image = image - 1.0
    feature = model.predict(image)
    return feature


In [4]:
# generate a description for an image
def generate_desc(model, tokenizer, photo, max_length):
    # seed the generation process
    in_text = 'startseq'
    # iterate over the whole length of the sequence
    for i in range(max_length):

        sequence = tokenizer.texts_to_sequences([in_text])[0]

        sequence = pad_sequences([sequence], maxlen=max_length)

        yhat = model.predict([photo,sequence], verbose=0)

        yhat = np.argmax(yhat)
        
        word = word_for_id(yhat, tokenizer)

        if word is None:
            break
        
        in_text += ' ' + word

        if word == 'endseq':
            break
    return in_text

### Generate Captions for images


In [5]:
%%time

# load the tokenizer
tokenizer = load(open('tokenizer.pkl', 'rb'))

max_length = 34 # from training


# Use InceptionV3 or Xception accordingly
# model = load_model('Xception_model.h5')
model = load_model('InceptionV3_model.h5')


# give any valid filename of the image to test
test_filename = 'TestImages/woman_and_dog.jpg'

picture = extract_features(test_filename)


# generate description
description = generate_desc(model, tokenizer, picture, max_length)
#print(description)

#Remove startseq and endseq
query = description
stopwords = ['startseq','endseq']
querywords = query.split()

resultwords  = [word for word in querywords if word.lower() not in stopwords]
result = ' '.join(resultwords)

print(result)

TestImages/tom_and_jerry.jpg
Xception:  the small child is jumping on bed
Inception:  dog is running through the grass
Wall time: 4.14 s
