In [None]:
import tensorflow
import keras
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

print('tensorflow: %s' % tensorflow.__version__)
print('keras: %s' % keras.__version__)

In [None]:
from numpy import array
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.utils import plot_model
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, LSTM, Embedding, Dropout, concatenate
from tensorflow.keras.callbacks import ModelCheckpoint
import warnings

warnings.filterwarnings("ignore")


In [None]:
from os import listdir
from pickle import dump
from keras.applications.vgg19 import VGG19
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.preprocessing.image import img_to_array
from keras.applications.vgg19 import preprocess_input
from keras.models import Model

def extract_features(address,filenames):
    model = VGG19()
    model = Model(inputs=model.inputs, outputs=model.layers[-2].output)
    print(model.summary())
    features = dict()
    for name in filenames:
        filename = address + name
        image = load_img(filename, target_size = (224,224))
        image = img_to_array(image)
        image = image.reshape(1, image.shape[0], image.shape[1], image.shape[2])
        image = preprocess_input(image)
        feature = model.predict(image, verbose = 0)
        image_id = name.split('.')[0]
        features[image_id] = feature
        #print('>%s' % name)
    return features
    
directory = '/kaggle/input/imageclef/train/train'

In [None]:
image_dataset = listdir(directory)
directory1='/kaggle/input/imageclef/test_images/test'
train_data = image_dataset
test_data = listdir(directory1)

In [None]:
print("The number of jpg flies in Flicker8k: {}".format(len(image_dataset)))
print("The number of jpg flies in Train data: {}".format(len(train_data)))
print("The number of jpg flies in Test data: {}".format(len(test_data)))

In [None]:
import tensorflow as tf

# Check for GPU availability
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    print("GPU is available. TensorFlow will use GPU:", gpus)
    try:
        # Set GPU memory growth to prevent TensorFlow from allocating all memory at once
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)
else:
    print("GPU not available. Using CPU.")

In [None]:
train_features =  extract_features(directory +'/' , train_data)
test_features = extract_features(directory1 + '/', test_data)

In [None]:
import pickle
with open('/kaggle/working/features.pkl', 'wb') as f:
   pickle.dump(train_features, f)

In [None]:
import pandas as pd
import csv

df = pd.read_csv('/kaggle/input/imageclef/train_captions.csv')

with open('/kaggle/input/imageclef/train_captions.csv', 'r') as csv_file:
    csv_reader = csv.reader(csv_file)
    next(csv_reader)
    
    with open('/kaggle/working/captions_1.txt', 'w') as txt_file:
        for row in csv_reader:
            img_id = row[0]
            caption = row[1]
    
            txt_file.write(f"{img_id} {caption}\n")


In [None]:
from collections import Counter
import numpy as np
unique_images = np.unique(df.ID.values)
print("The number of unique image names : {}".format(len(unique_images)))
print("The distribution of the number of captions for each image:")
Counter(Counter(df.ID.values).values())

In [None]:
import matplotlib.pyplot as plt

num_pic = 5
target_size = (224, 224, 3)

count = 1
fig = plt.figure(figsize=(10, 20))
for img in unique_images[7669:7669+num_pic]:
    filename = directory+'/'+img+".jpg"
    captions = list(df["Caption"].loc[df["ID"] == img].values)
    image_load = load_img(filename, target_size=target_size)    
    ax = fig.add_subplot(num_pic, 2, count, xticks=[], yticks=[])
    ax.imshow(image_load)
    count += 1
    ax = fig.add_subplot(num_pic, 2, count)
    plt.axis('off')
    ax.plot()
    ax.set_xlim(0, 1)
    ax.set_ylim(0, len(captions))
    for i, caption in enumerate(captions):
        ax.text(0, i, caption, fontsize=20)
    count += 1
plt.show()


In [None]:
def df_word(df_txt):
    vocabulary = []
    for i in range(len(df_txt)):
        temp=df_txt.iloc[i,1]
        vocabulary.extend(temp.split())
    print('Vocabulary Size: %d' % len(set(vocabulary)))
    ct = Counter(vocabulary)
    dfword = pd.DataFrame({"word":list(ct.keys()),"count":list(ct.values())})
    dfword = dfword.sort_values("count",ascending=False)
    dfword = dfword.reset_index()[["word","count"]]
    return(dfword)
dfword = df_word(df)
dfword.head()

In [None]:
topn = 50

def plthist(dfsub, title="The top 50 most frequently appearing words"):
    plt.figure(figsize=(20,3))
    plt.bar(dfsub.index,dfsub["count"])
    plt.yticks(fontsize=20)
    plt.xticks(dfsub.index,dfsub["word"],rotation=90,fontsize=20)
    plt.title(title,fontsize=20)
    plt.show()

plthist(dfword.iloc[:topn,:],
        title="Top 50 most frequently appearing words")
plthist(dfword.iloc[-topn:,:],
        title="Least 50 appearing words")

In [None]:
import string
import re

def load_doc(filename):
    file = open(filename, 'r')
    text = file.read()
    file.close()
    return text

def load_descriptions(doc):
    mapping = dict()
    for line in doc.split('\n'):
        tokens = line.split()
        if len(line) < 2:
            continue
        image_id, image_desc = tokens[0], tokens[1:]
        image_id = image_id.split('.')[0]
        image_desc = ' '.join(image_desc)
        if image_id not in mapping:
            mapping[image_id] = list()
        mapping[image_id].append(image_desc)
    return mapping

def clean_descriptions(descriptions):
    table = str.maketrans('','',string.punctuation)
    for key, desc_list in descriptions.items():
        for i in range(len(desc_list)):
            desc = desc_list[i]
            desc = desc.split()
            desc = [word.lower() for word in desc]
            desc = [re.sub(r'[^\w\s]', '', w) for w in desc]
            desc = [word for word in desc if len(word) > 1]
            desc = [word for word in desc if word.isalpha()]
            desc_list[i] = ' '.join(desc)

def to_vocabulary(descriptions):
    all_desc = set()
    for key in descriptions.keys():
        [all_desc.update(d.split()) for d in descriptions[key]]
    return all_desc

def save_descriptions(descriptions, filename):
    lines = list()
    for key, desc_list in descriptions.items():
        for desc in desc_list:
            lines.append(key + ' ' + desc)
    data = '\n'.join(lines)
    
    filename.write(data)
    filename.close()

filename = '/kaggle/working/captions_1.txt'
doc = load_doc(filename)
descriptions = load_descriptions(doc)
print('Loaded: %d' % len(descriptions))
clean_descriptions(descriptions)
voc = to_vocabulary(descriptions)
print('Vocabulary size: %d' % len(voc))
with open('/kaggle/working/descriptions_1.txt', 'w') as f:
      save_descriptions(descriptions, f)

In [None]:
def modify_descriptions(img_names, img_descs):
    desc_dict = dict()
    for key, desc in img_descs.items():
        if key in img_names:
            modified_desc = list()
            for i in range(len(desc)):
                modified_desc.append('startseq ' + desc[i] + ' endseq')
            desc_dict[key] = modified_desc
            print(modified_desc)
    return desc_dict

train_image_names = [i.split('.')[0] for i in train_data]
test_image_names = [i.split('.')[0] for i in test_data]

train_descriptions = modify_descriptions(train_image_names, descriptions)    
test_descriptions = modify_descriptions(test_image_names, descriptions)

In [None]:
from keras.utils import to_categorical
from keras.preprocessing.sequence import pad_sequences


def to_lines(descriptions):
    all_desc = list()
    for key in descriptions.keys():
        [all_desc.append(d) for d in descriptions[key]]
    return all_desc
  
def create_tokenizer(desc):
    lines = to_lines(desc)
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(lines)
    return tokenizer

tokenizer = create_tokenizer(train_descriptions)
vocab_size = len(tokenizer.word_index) + 1
print('Vocabulary Size: %d' % vocab_size)

def max_length_function(desc):
    lines = to_lines(desc)
    return max(len(d.split()) for d in lines)

def create_sequences(tokenizer, max_length, desc_list, photo):
    X1, X2, y = [], [], []
    for desc in desc_list:
        seq = tokenizer.texts_to_sequences([desc])[0]
        for i in range(1, len(seq)):
            in_seq, out_seq = seq[:i], seq[i]
            in_seq = pad_sequences([in_seq], maxlen=max_length)[0]
            out_seq = to_categorical([out_seq], num_classes = vocab_size)[0]
            X1.append(photo)
            X2.append(in_seq)
            y.append(out_seq)
    return array(X1), array(X2), array(y)

In [None]:
def define_model(vocab_size, max_length):
    input1 = Input(shape=(4096,))
    fe1 = Dropout(0.5)(input1)
    fe2 = Dense(256, activation='relu')(fe1)
    input2 = Input(shape=(max_length,))
    se1 = Embedding(vocab_size, 256, mask_zero=True)(input2)
    se2 = Dropout(0.5)(se1)
    se3 = LSTM(256)(se2)
    decoder1 = concatenate([fe2, se3])
    decoder2 = Dense(256, activation = 'relu')(decoder1)
    outputs = Dense(vocab_size, activation = 'softmax')(decoder2)
    model = Model(inputs=[input1, input2], outputs=outputs)
    model.compile(loss = 'categorical_crossentropy', optimizer = 'adam')
    print(model.summary())
    return model

In [None]:
def data_generator(descriptions, photos, tokenizer, max_length):
    while 1:
        for key, desc_list in descriptions.items():
            photo = photos[key][0]
            in_img, in_seq, out_word = create_sequences(tokenizer, max_length, desc_list, photo)
            yield [[in_img, in_seq], out_word]

In [None]:
tokenizer = create_tokenizer(train_descriptions)
vocab_size = len(tokenizer.word_index) + 1

# Maximum number of words in a line or sentence
max_length = max_length_function(train_descriptions)

# Defining the final Model
model = define_model(vocab_size, max_length)

# train the model, run epochs manually and save after each epoch
epochs = 2
steps = len(train_descriptions)
for i in range(epochs):
    generator = data_generator(train_descriptions, train_features, tokenizer, max_length)
    model.fit(generator, epochs=5, steps_per_epoch=steps, verbose=1)
    model.save(directory + 'model_final_' + str(i) + '.h5')