In [4]:
from nltk.translate.meteor_score import meteor_score

In [5]:
from numpy import argmax
from pickle import load
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import load_model
from nltk.translate.bleu_score import corpus_bleu

Using TensorFlow backend.


In [6]:
def load_doc(filename):
	# open the file as read only
	file = open(filename, 'r')
	# read all text
	text = file.read()
	# close the file
	file.close()
	return text

In [7]:
def load_set(filename):
	doc = load_doc(filename)
	dataset = list()
	# process line by line
	for line in doc.split('\n'):
		# skip empty lines
		if len(line) < 1:
			continue
		# get the image identifier
		identifier = line.split('.')[0]
		dataset.append(identifier)
	return set(dataset)

In [8]:
def load_clean_descriptions(filename, dataset):
	# load document
	doc = load_doc(filename)
	descriptions = dict()
	for line in doc.split('\n'):
		# split line by white space
		tokens = line.split()
		# split id from description
		image_id, image_desc = tokens[0], tokens[1:]
		# skip images not in the set
		if image_id in dataset:
			# create list
			if image_id not in descriptions:
				descriptions[image_id] = list()
			# wrap description in tokens
			desc = 'startseq ' + ' '.join(image_desc) + ' endseq'
			# store
			descriptions[image_id].append(desc)
	return descriptions

In [9]:
def load_photo_features(filename, dataset):
	# load all features
	all_features = load(open(filename, 'rb'))
	# filter features
	features = {k: all_features[k] for k in dataset}
	return features

In [10]:
def to_lines(descriptions):
	all_desc = list()
	for key in descriptions.keys():
		[all_desc.append(d) for d in descriptions[key]]
	return all_desc

In [11]:
def create_tokenizer(descriptions):
	lines = to_lines(descriptions)
	tokenizer = Tokenizer()
	tokenizer.fit_on_texts(lines)
	return tokenizer

In [12]:
def max_length(descriptions):
	lines = to_lines(descriptions)
	return max(len(d.split()) for d in lines)

In [13]:
def word_for_id(integer, tokenizer):
	for word, index in tokenizer.word_index.items():
		if index == integer:
			return word
	return None

In [14]:
def generate_desc(model, tokenizer, photo, max_length):
	# seed the generation process
	in_text = 'startseq'
	# iterate over the whole length of the sequence
	for i in range(max_length):
		# integer encode input sequence
		sequence = tokenizer.texts_to_sequences([in_text])[0]
		# pad input
		sequence = pad_sequences([sequence], maxlen=max_length)
		# predict next word
		yhat = model.predict([photo,sequence], verbose=0)
		# convert probability to integer
		yhat = argmax(yhat)
		# map integer to word
		word = word_for_id(yhat, tokenizer)
		# stop if we cannot map the word
		if word is None:
			break
		# append as input for generating the next word
		in_text += ' ' + word
		# stop if we predict the end of the sequence
		if word == 'endseq':
			break
	return in_text

In [15]:
def evaluate_model(model, descriptions, photos, tokenizer, max_length):
	actual, predicted = list(), list()
	# step over the whole set
	for key, desc_list in descriptions.items():
		# generate description
		yhat = generate_desc(model, tokenizer, photos[key], max_length)
		# store actual and predicted
		references = [d.split() for d in desc_list]
		actual.append(references)
		predicted.append(yhat.split())
	# calculate BLEU score
	print('BLEU-1: %f' % corpus_bleu(actual, predicted, weights=(1.0, 0, 0, 0)))
	print('BLEU-2: %f' % corpus_bleu(actual, predicted, weights=(0.5, 0.5, 0, 0)))
	print('BLEU-3: %f' % corpus_bleu(actual, predicted, weights=(0.3, 0.3, 0.3, 0)))
	print('BLEU-4: %f' % corpus_bleu(actual, predicted, weights=(0.25, 0.25, 0.25, 0.25)))

In [16]:
filename = 'Flickr8k_text/Flickr_8k.trainImages.txt'
train = load_set(filename)
print('Dataset: %d' % len(train))
# descriptions
train_descriptions = load_clean_descriptions('descriptions.txt', train)
print('Descriptions: train=%d' % len(train_descriptions))
# prepare tokenizer
tokenizer = create_tokenizer(train_descriptions)
vocab_size = len(tokenizer.word_index) + 1
print('Vocabulary Size: %d' % vocab_size)
# determine the maximum sequence length
max_length = max_length(train_descriptions)
print('Description Length: %d' % max_length)

Dataset: 6000
Descriptions: train=6000
Vocabulary Size: 7579
Description Length: 34


In [47]:
filename = 'Flickr8k_text/Flickr_8k.testImages.txt'
test = load_set(filename)
print('Dataset: %d' % len(test))
# descriptions
test_descriptions = load_clean_descriptions('descriptions.txt', test)
print('Descriptions: test=%d' % len(test_descriptions))
# photo features
test_features = load_photo_features('features.pkl', test)
print('Photos: test=%d' % len(test_features))

Dataset: 1000
Descriptions: test=1000
Photos: test=1000


In [48]:
def return_actpred(model, descriptions, photos, tokenizer, max_length):
	actual, predicted = list(), list()
	# step over the whole set
	for key, desc_list in descriptions.items():
		# generate description
		yhat = generate_desc(model, tokenizer, photos[key], max_length)
		# store actual and predicted
		references = [d.split() for d in desc_list]
		actual.append(references)
		predicted.append(yhat.split())
	# calculate BLEU score
	return actual,predicted

In [51]:
filename='D:\Models\model_19.h5'

In [52]:
model = load_model(filename)
actual,predicted=return_actpred(model, test_descriptions, test_features, tokenizer, max_length)

In [53]:
print(actual[0])

[['startseq', 'blond', 'woman', 'in', 'blue', 'shirt', 'appears', 'to', 'wait', 'for', 'ride', 'endseq'], ['startseq', 'blond', 'woman', 'is', 'on', 'the', 'street', 'hailing', 'taxi', 'endseq'], ['startseq', 'woman', 'is', 'signaling', 'is', 'to', 'traffic', 'as', 'seen', 'from', 'behind', 'endseq'], ['startseq', 'woman', 'with', 'blonde', 'hair', 'wearing', 'blue', 'tube', 'top', 'is', 'waving', 'on', 'the', 'side', 'of', 'the', 'street', 'endseq'], ['startseq', 'the', 'woman', 'in', 'the', 'blue', 'dress', 'is', 'holding', 'out', 'her', 'arm', 'at', 'oncoming', 'traffic', 'endseq']]


In [54]:
print(predicted[0])

['startseq', 'two', 'dogs', 'are', 'running', 'through', 'the', 'grass', 'endseq']


In [55]:
print(actual[1])

[['startseq', 'boy', 'in', 'his', 'blue', 'swim', 'shorts', 'at', 'the', 'beach', 'endseq'], ['startseq', 'boy', 'smiles', 'for', 'the', 'camera', 'at', 'beach', 'endseq'], ['startseq', 'young', 'boy', 'in', 'swimming', 'trunks', 'is', 'walking', 'with', 'his', 'arms', 'outstretched', 'on', 'the', 'beach', 'endseq'], ['startseq', 'children', 'playing', 'on', 'the', 'beach', 'endseq'], ['startseq', 'the', 'boy', 'is', 'playing', 'on', 'the', 'shore', 'of', 'an', 'ocean', 'endseq']]


In [56]:
print(predicted[1])

['startseq', 'two', 'dogs', 'are', 'running', 'through', 'the', 'grass', 'endseq']


In [57]:
print(predicted[2])

['startseq', 'two', 'dogs', 'are', 'running', 'through', 'the', 'grass', 'endseq']


In [58]:
for i in range(900):
    print(predicted[i])

['startseq', 'two', 'dogs', 'are', 'running', 'through', 'the', 'grass', 'endseq']
['startseq', 'two', 'dogs', 'are', 'running', 'through', 'the', 'grass', 'endseq']
['startseq', 'two', 'dogs', 'are', 'running', 'through', 'the', 'grass', 'endseq']
['startseq', 'two', 'dogs', 'are', 'running', 'through', 'the', 'grass', 'endseq']
['startseq', 'two', 'dogs', 'are', 'running', 'through', 'the', 'grass', 'endseq']
['startseq', 'two', 'dogs', 'are', 'running', 'through', 'the', 'grass', 'endseq']
['startseq', 'two', 'dogs', 'are', 'running', 'through', 'the', 'grass', 'endseq']
['startseq', 'two', 'dogs', 'are', 'running', 'through', 'the', 'grass', 'endseq']
['startseq', 'two', 'dogs', 'are', 'running', 'through', 'the', 'grass', 'endseq']
['startseq', 'two', 'dogs', 'are', 'running', 'through', 'the', 'grass', 'endseq']
['startseq', 'two', 'dogs', 'are', 'running', 'through', 'the', 'grass', 'endseq']
['startseq', 'two', 'dogs', 'are', 'running', 'through', 'the', 'grass', 'endseq']
['st