In [None]:
from os import listdir
from numpy import array
from keras.models import Model
from pickle import dump
from keras.applications.vgg16 import VGG16
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.utils import plot_model

from keras.models import Model
from keras.layers import Input
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Embedding
from keras.layers import Dropout
from tensorflow.keras.layers import Add
from keras.callbacks import ModelCheckpoint

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
from keras.applications.vgg16 import VGG16, preprocess_input
model = VGG16()
# re-structure the model
model.layers.pop()
model = Model(inputs=model.inputs, outputs=model.layers[-2].output)
# summarize
print(model.summary())

None


In [None]:
from os import listdir
from pickle import dump
from tensorflow.keras.preprocessing.image import img_to_array, load_img
from keras.models import Model

def extract_features(directory):
	features = dict()
	for name in listdir(directory):
		filename = directory + '/' + name
		image = load_img(filename, target_size=(224, 224))
		image = img_to_array(image)
		image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
		image = preprocess_input(image)
		feature = model.predict(image, verbose=0)
		image_id = name.split('.')[0]
		features[image_id] = feature
		print('>%s' % name)
	return features


In [None]:
# # Example usage
# directory = "/content/drive/MyDrive/img_img_cap_gen"
# features = extract_features(directory)
# dump(features, open('features1.pkl', 'wb'))
# print("Extracted Features: %d" % len(features))

In [None]:
import string
from nltk.tokenize import word_tokenize

def load_doc(filename):
    file = open(filename, 'r')
    text = file.read()
    file.close()
    return text

In [None]:
def load_descriptions(doc):
	mapping = dict()
	for line in doc.split('\n'):
		tokens = line.split()
		if len(line) < 2:
			continue
		image_id, image_desc = tokens[0], tokens[1:]
		image_id = image_id.split('.')[0]
		image_desc = ' '.join(image_desc)
		if image_id not in mapping:
			mapping[image_id] = list()
		mapping[image_id].append(image_desc)
	return mapping

In [None]:
def clean_descriptions(descriptions):
    table = str.maketrans('', '', string.punctuation)
    for key, desc_list in descriptions.items():
        for i in range(len(desc_list)):
            desc = desc_list[i]
            desc = desc.split()
            desc = [word.lower() for word in desc]
            desc = [w.translate(table) for w in desc]
            desc = [word for word in desc if len(word)>1]
            desc = [word for word in desc if word.isalpha()]
            desc_list[i] =  ' '.join(desc)
def to_vocabulary(descriptions):
    all_desc = set()
    for key in descriptions.keys():
        [all_desc.update(d.split()) for d in descriptions[key]]
    return all_desc

In [None]:
def save_descriptions(descriptions, filename):
    lines = list()
    for key, desc_list in descriptions.items():
        for desc in desc_list:
            lines.append(key + " " + desc)
    data = '\n'.join(lines)
    file = open(filename, 'w')
    file.write(data)
    file.close()

In [None]:
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [None]:
filename = "/content/drive/MyDrive/Flickr8k.token.txt"
doc = load_doc(filename)
descriptions = load_descriptions(doc)
print("Loaded: %d" %len(descriptions))

Loaded: 8092


In [None]:

#clean desc
clean_descriptions(descriptions)
vocab = to_vocabulary(descriptions)
print("Vocab size: %d" %len(vocab))

Vocab size: 8763


In [None]:
from pickle import dump
import pickle
def load_doc(filename):
	file = open(filename, 'r')
	text = file.read()
	file.close()
	return text

def load_set(filename):
    doc = load_doc(filename)
    dataset = list()
    for line in doc.split("\n"):
        if len(line) < 1:
            continue
        identifier = line.split('.')[0]
        dataset.append(identifier)
    return set(dataset)


In [None]:
def load_photo_features(features, dataset):
    all_features = load(open(features, 'rb'))
    features = {k: all_features[k] for k in dataset}
    return features

In [None]:
def load_clean_descriptions(filename, dataset):
	doc = load_doc(filename)
	descriptions = dict()
	for line in doc.split('\n'):
		tokens = line.split()
		image_id, image_desc = tokens[0], tokens[1:]
		if image_id in dataset:
			if image_id not in descriptions:
				descriptions[image_id] = list()
			desc = 'startseq ' + ' '.join(image_desc) + ' endseq'

			descriptions[image_id].append(desc)
	return descriptions

In [None]:
def load_photo_features(filename, dataset):

    with open(filename, 'rb') as file:
        all_features = pickle.load(file)
    features = {k: all_features[k] for k in dataset if k in all_features}
    return features

In [None]:
def to_lines(descriptions):
    all_desc = list()
    for key in descriptions.keys():
        [all_desc.append(d) for d in descriptions[key]]
    return all_desc

def create_tokenizer(descriptions):
    lines = to_lines(descriptions)
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(lines)
    return tokenizer

In [None]:
def max_length(description):
    lines = to_lines(description)
    return max(len(d.split()) for d in lines)



Maximum Description Length: 34


In [None]:
def calculate_max_length(descriptions):
    return max(len(d.split()) for d_list in descriptions.values() for d in d_list)

max_length = calculate_max_length(train_descriptions)
print('Description Length: %d' % max_length)


Description Length: 34


In [None]:
def create_sequences(tokenizer, max_length, desc_list, photo):
    X1, X2, y = list(), list(), list()
    for desc in desc_list:
        seq = tokenizer.texts_to_sequences([desc])[0]
        for i in range(1, len(seq)):
            in_seq, out_seq = seq[:i], seq[i]
            in_seq = pad_sequences([in_seq], maxlen=max_length)[0]
            out_seq = to_categorical([out_seq], num_classes=vocab_size)[0]

            X1.append(photo)
            X2.append(in_seq)
            y.append(out_seq)
    return array(X1), array(X2), array(y)

In [None]:
from tensorflow.keras.layers import add
def define_model(vocab_size, max_length):

    inputs1 = Input(shape=(1000,))
    fe1 = Dropout(0.5)(inputs1)
    fe2 = Dense(256, activation='relu')(fe1)

    inputs2 = Input(shape=(max_length,))
    se1 = Embedding(vocab_size,output_dim=256, mask_zero=True)(inputs2)
    se2 = Dropout(0.5)(se1)
    se3 = LSTM(256)(se2)

    decoder1 = add([fe2, se3])
    decoder2 = Dense(256, activation='relu')(decoder1)
    outputs = Dense(vocab_size, activation='softmax')(decoder2)

    model = Model(inputs=[inputs1, inputs2], outputs=outputs)
    model.compile(loss='categorical_crossentropy', optimizer='adam')

    print(model.summary())
    return model




In [None]:
def data_generator(descriptions, photos, tokenizer, max_length):
    while 1:
        for key, desc_list in descriptions.items():
            photo = photos[key][0]
            in_img, in_seq, out_word = create_sequences(tokenizer, max_length, desc_list, photo)
            yield [[in_img, in_seq], out_word]

In [None]:
# # Assuming the 'data_generator' function currently returns a list like this:
# # return [in1, in2, out]

# # Change the return statement in the data_generator function to a tuple:
# return (in1, in2, out)

Maximum Description Length: 34


In [None]:
import tensorflow as tf
filename = "/content/drive/MyDrive/trainImg.txt"
train = load_set(filename)
print("Dataset: %d" %len(train))

train_descriptions = load_clean_descriptions("/content/drive/MyDrive/desc1.txt", train)
print("train_descriptions= %d" %len(train_descriptions))

train_feature = load_photo_features("/content/drive/MyDrive/features.pkl", train)
print("photos: train= %d" %len(train_feature))

tokenizer = create_tokenizer(train_descriptions)
vocab_size = len(tokenizer.word_index)+1
print("Vocab size: %d" %vocab_size)

# max_length = max_length(train_descriptions)
# print('Description Length: %d' % max_length)

Dataset: 6000
train_descriptions= 6000
photos: train= 6000
Vocab size: 7577


In [None]:
import pickle

with open('/content/drive/MyDrive/tokenizer1.pkl', 'wb') as f:
    pickle.dump(tokenizer, f)

In [None]:
print(max_length)

34


In [None]:

# train model
model = define_model(vocab_size, max_length)
epochs = 4
steps = len(train_descriptions)
model.summary()

None


In [None]:
for i in range(epochs):
    #create data generator
    generator = data_generator(train_descriptions, train_feature, tokenizer, max_length)
    # Fit the model using the generator
    # The generator should return a tuple of (inputs, targets) or (inputs, targets, sample_weights)
    for x, y in generator: # This loop iterates through the generator and unpacks the tuples
        model.fit(x, y, epochs=1, verbose=1) # Fit using the unpacked data
    model.save("model_" + str(i) + ".h5")

In [None]:
from keras.models import load_model
from keras.optimizers import Adam
filename = "/content/drive/MyDrive/model.h5"
model = load_model(filename, compile=False)
model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy') # Example with Adam

  super().__init__(**kwargs)


In [None]:
def load_doc(filename):
		file = open(filename, 'r')
	text = file.read()
	file.close()
	return text

def load_set(filename):
	doc = load_doc(filename)
	dataset = list()
	for line in doc.split('\n'):
		if len(line) < 1:
			continue
		identifier = line.split('.')[0]
		dataset.append(identifier)
	return set(dataset)

In [None]:
import pickle

def load_photo_features(filename, dataset):
   with open(filename, 'rb') as file:
        all_features = pickle.load(file)
    features = {k: all_features[k] for k in dataset if k in all_features}
    return features

train_feature = load_photo_features("/content/drive/MyDrive/features.pkl", train)
print("photos: train= %d" %len(train_feature))


def to_lines(descriptions):
	all_desc = list()
	for key in descriptions.keys():
		[all_desc.append(d) for d in descriptions[key]]
	return all_desc

def create_tokenizer(descriptions):
	lines = to_lines(descriptions)
	tokenizer = Tokenizer()
	tokenizer.fit_on_texts(lines)
	return tokenizer

def max_length(descriptions):
	lines = to_lines(descriptions)
	return max(len(d.split()) for d in lines)

def word_for_id(integer, tokenizer):
	for word, index in tokenizer.word_index.items():
		if index == integer:
			return word
	return None


photos: train= 6000


In [None]:
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
def generate_desc(model, tokenizer, photo, max_length):
	in_text = 'startseq'
	for i in range(max_length):
		sequence = tokenizer.texts_to_sequences([in_text])[0]

		sequence = pad_sequences([sequence], maxlen=max_length)

		yhat = model.predict([photo,sequence], verbose=0)

		yhat = np.argmax(yhat)
		word = word_for_id(yhat, tokenizer)
		if word is None:
			break
		in_text += ' ' + word
		if word == 'endseq':
			break
	return in_text


from nltk.translate.bleu_score import corpus_bleu
def evaluate_model(model, descriptions, photos, tokenizer, max_length):
	actual, predicted = list(), list()
	for key, desc_list in descriptions.items():
		yhat = generate_desc(model, tokenizer, photos[key], max_length)
		references = [d.split() for d in desc_list]
		actual.append(references)
		predicted.append(yhat.split())
	print('BLEU-1: %f' % corpus_bleu(actual, predicted, weights=(1.0, 0, 0, 0)))
	print('BLEU-2: %f' % corpus_bleu(actual, predicted, weights=(0.5, 0.5, 0, 0)))
	print('BLEU-3: %f' % corpus_bleu(actual, predicted, weights=(0.3, 0.3, 0.3, 0)))
	print('BLEU-4: %f' % corpus_bleu(actual, predicted, weights=(0.25, 0.25, 0.25, 0.25)))



In [None]:
#load train dataset
import tensorflow as tf
filename = "/content/drive/MyDrive/trainImg.txt"
train = load_set(filename)
print("Dataset: %d" %len(train))

train_descriptions = load_clean_descriptions("/content/drive/MyDrive/desc.txt", train)
print("train_descriptions= %d" %len(train_descriptions))

train_feature = load_photo_features("/content/drive/MyDrive/features.pkl", train)
print("photos: train= %d" %len(train_feature))

tokenizer = create_tokenizer(train_descriptions)
vocab_size = len(tokenizer.word_index)+1
print("Vocab size: %d" %vocab_size)

max_length = max_length(train_descriptions)
print('Description Length: %d' % max_length)

Dataset: 6000
train_descriptions= 6000
photos: train= 6000
Vocab size: 7268
Description Length: 33


In [None]:
filename = "/content/drive/MyDrive/testImg.txt"
test = load_set(filename)
print("Dataset: %d" %len(test))
test_description = load_clean_descriptions("/content/drive/MyDrive/desc.txt", test)
print("Description= %d" %len(test_description))
test_features = load_photo_features("/content/drive/MyDrive/features.pkl", test)
print("photos: test=%d" % len(test_features))


Dataset: 1000
Description= 1000
photos: test=1000


In [None]:
from pickle import load
from numpy import argmax
from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras.applications.vgg16 import VGG16
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.preprocessing.image import img_to_array
from keras.applications.vgg16 import preprocess_input
from keras.models import Model
from keras.models import load_model

In [None]:
def extract_features(filename):
	# load the model
	model = VGG16()
	model.layers.pop()
	model = Model(inputs=model.inputs, outputs=model.layers[-2].output)
	image = load_img(filename, target_size=(224, 224))
	image = img_to_array(image)
	image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
	image = preprocess_input(image)
	feature = model.predict(image, verbose=0)
	return feature

In [None]:
from pickle import load
from tensorflow.keras.preprocessing.text import Tokenizer

tokenizer = load(open('/content/drive/MyDrive/tokenizer1.pkl', 'rb'))
max_len = 34
photo = extract_features("/content/drive/MyDrive/practice2.jpg")
tokenizer.analyzer = None
description = generate_desc(model, tokenizer, photo, max_len)
print(description)

startseq man in red shirt is sitting on bench with his arms out to the camera endseq


In [None]:
query = description
stopwords = ['startseq','endseq']
querywords = query.split()

resultwords  = [word for word in querywords if word.lower() not in stopwords]
result = ' '.join(resultwords)

print(result)

man in red shirt is sitting on bench with his arms out to the camera
