In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# **Import libraries**

In [None]:
from os import listdir
from pickle import dump
import tensorflow as tf
from keras.applications import EfficientNetV2L
from tensorflow.keras.utils import load_img
from tensorflow.keras.utils import img_to_array
from tensorflow.keras.applications.efficientnet_v2 import preprocess_input
from keras.models import Model
import pandas as pd

# **Encoder part**

In [None]:
def extract_features(directory):
    
  model = EfficientNetV2L()
  model.layers.pop()
  model = Model(inputs = model.inputs,outputs = model.layers[-1].output)
  #print(model.summary())
  features = dict()
  for name in listdir(directory):
    filename = directory +'/'+ name
    image = load_img(filename,target_size = (480,480))
    image = img_to_array(image)
    image = image.reshape(1,image.shape[0],image.shape[1],image.shape[2])
    image = preprocess_input(image)
    feature = model.predict(image,verbose=1)
    image_id = name.split('.')[0]
    features[image_id] = feature
    print("image name is : "+name)
  return features

# **Extract feature from images**

In [None]:
features = dict()
features = extract_features('/kaggle/input/flickr-image-dataset/flickr30k_images/flickr30k_images/flickr30k_images/')


In [None]:
print(len(features))

In [None]:
WORKING_DIR = '/kaggle/working'

In [None]:
# store features in pickle
import os
import pickle
pickle.dump(features, open(os.path.join(WORKING_DIR, 'features.pkl'), 'wb'))

In [None]:
# load features from pickle
import os
import pickle
with open(os.path.join('/kaggle/input/feature', 'features.pkl'), 'rb') as f:
    features = pickle.load(f)
    
print(features['10002456'].shape)
print(features['1000268201'].shape)

# **Data PreProcessing**

In [None]:
with open(os.path.join('/kaggle/input/d/nournirabi/results/', 'results_Copy.txt'), 'r') as f:
    next(f)
    desc = f.read()

In [None]:
#import lib we need
import string
from tqdm.notebook import tqdm

In [None]:
def load_document(filename):
    file = open(filename,'r')
    text = file.read()
    file.close()
    return text 


In [None]:
def load_descriptions(filename):
    # create mapping of image to captions
    mapping = dict()
   # process lines
    for line in doc.split('\n'):
        if '"' in line:
            line = line.replace('"','')
        tokens = line.split( )
        if len(line) < 2:
            continue
        image_id, caption = tokens[0], tokens[1:]
        # remove extension from image ID
        image_id = image_id.split('.')[0]
        # convert caption list to string
        caption = " ".join(caption)
        # create list if needed
        if image_id not in mapping:
            mapping[image_id] = []
        # store the caption
        mapping[image_id].append(caption)
    return mapping

In [None]:
def clean_description(description):
    table = str.maketrans('','',string.punctuation)
    for key,desc_list in description.items() :
        for i in range(len(desc_list)):
            desc = desc_list[i]
            desc = desc.split()
            desc = [word.lower() for word in desc]
            desc = [word.translate(table) for word in desc]
            desc = [word for word in desc if len(word) > 1]
            desc = [word for word in desc if word.isalpha()]
            desc_list[i] = ' '.join(desc)

In [None]:
# Converting the loaded descriptions into a vocabulary of words

def to_vocabulary(descriptions):
    
    # Build a list of all description strings
    all_desc = set()
    
    for key in descriptions.keys():
        
        [all_desc.update(d.split()) for d in descriptions[key]]
    
    return all_desc

In [None]:
def save_descriptions(descriptions , filename):
    lines = list()
    for key , desc_list in descriptions.items():
        for desc in desc_list :
            lines.append(key +' '+desc)
    data = '\n'.join(lines)
    file = open(filename,'w')
    file.write(data)
    file.close()

In [None]:
filename = '/kaggle/input/d/nournirabi/results/results_Copy.txt'
# Loading descriptions
doc = load_document(filename)

# Cleaning the descriptions
descriptions = load_descriptions(filename)
clean_description(descriptions)

# Summarizing the vocabulary
vocabulary = to_vocabulary(descriptions)
print('Vocabulary Size: %d' % len(vocabulary))

# Saving to the file
save_descriptions(descriptions, 'descriptions.txt')

In [None]:
def max_length(descriptions):
    lines = to_lines(descriptions)
    return max(len(d.split()) for d in lines)

In [None]:
print(max_length(descriptions))

In [None]:
def load_set(filename):
    doc = load_document(filename)
    dataset = list()
    for line in doc.split('\n'):
        if '"' in line:
            line = line.replace('"','')
        if len(line) < 1:
            continue
        identifier = line.split('.')[0]
        dataset.append(identifier)
    return set(dataset)

In [None]:
from pickle import load
def load_image_feature(filename,dataset):
    all_feat = load(open(filename , 'rb'))
    features = {k : all_feat[k] for k in dataset}
    return features

In [None]:
def load_clean_descriptions(filename , dataset):
    doc = load_document(filename)
    
    discriptions = dict()

    for line in doc.split('\n'):
        tokens = line.split( )
        image_id, caption = tokens[0], tokens[1:]
        # remove extension from image ID
        #image_id = image_id.split('.')[0]
        if image_id in dataset :
            if image_id not in descriptions:
                descriptions[image_id] = list()
            desc = 'startseq ' +' '.join(caption)+' endseq'
            descriptions[image_id].append(desc)
            
    return descriptions

In [None]:
train = load_set(filename)
print("num of images : %d" % len(train))
#descriptions
descs = load_clean_descriptions('/kaggle/working/descriptions.txt' , train)
print("Descriptions : %d" % len(descs))
#features 
features = load_image_feature('/kaggle/input/feature/features.pkl',train)
print("Features : %d" % len(features))

In [None]:
def to_lines(descriptions):
    all_desc = list()
    for key in descriptions.keys():
        [all_desc.append(d) for d in descriptions[key]]
    return all_desc

In [None]:
def creat_tokenizer(descriptions):
    lines = to_lines(descriptions)
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(lines)
    return tokenizer

In [None]:
from tensorflow.keras.preprocessing.text import Tokenizer
# tokenize the text
tokenizer = creat_tokenizer(descs)
vocab_size = len(tokenizer.word_index) + 1
vocab_size

In [None]:
print(type(vocab_size))
print(type(tokenizer))

In [None]:
def max_length(descriptions):
    lines = to_lines(descriptions)
    return max(len(d.split()) for d in lines)

In [None]:
print(max_length(descriptions))

# **Data Split**

# **Data Generator**

In [None]:
def create_sequences(tokenizer, max_length, desc_list, feature):
    x_1, x_2, y = list(), list(), list()
    # move through each description for the image
    for desc in desc_list:
        # encode the sequence
        seq = tokenizer.texts_to_sequences([desc])[0]
        # divide one sequence into various X,y pairs
        for i in range(1, len(seq)):
                # divide into input and output pair
                in_seq, out_seq = seq[:i], seq[i]
                # pad input sequence
                in_seq = pad_sequences([in_seq], maxlen=max_length)[0]
                # encode output sequence
                out_seq = to_categorical([out_seq], num_classes=vocab_size)[0]
                # store
                x_1.append(feature)
                x_2.append(in_seq)
                y.append(out_seq)
    return np.array(x_1), np.array(x_2), np.array(y)

In [None]:
def data_generator(descriptions, features, tokenizer, max_length): 
    while 1:
        for key, description_list in descriptions.items():
            #retrieve photo features
            feature = features[key][0]
            inp_image, inp_seq, op_word = create_sequences(tokenizer, max_length, description_list, feature)
            yield [[inp_image, inp_seq], op_word]

# **Create Model (Decoder Part)**

**import lib we need**

In [None]:
import numpy as np

from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model
from tensorflow.keras.utils import to_categorical, plot_model
from tensorflow.keras.layers import Input, Dense, LSTM, Embedding, Dropout, add


In [None]:
from keras.utils import plot_model

  # define the captioning model
    
def define_model(vocab_size, max_length):
    
    # features from the CNN model compressed from 2048 to 256 nodes
    
    inputs1 = Input(shape=(1000,))
    
    fe1 = Dropout(0.5)(inputs1)
    fe2 = Dense(370, activation='relu')(fe1)
    
    # LSTM sequence model
    
    inputs2 = Input(shape=(max_length,))
    se1 = Embedding(vocab_size, 370, mask_zero=True)(inputs2)
    se2 = add([fe2, se1])
    se3 = (LSTM(512,return_sequences = True))(se2)
    se4 = Dropout(0.5)(se3)
    
 
    decoder = Dense(512, activation='relu')(se4)
    outputs = Dense(vocab_size, activation='softmax')(decoder)
   
    model = Model(inputs=[inputs1, inputs2], outputs=outputs)
    model.compile(loss='categorical_crossentropy', optimizer='adam' , metrics=['accuracy'])
    # summarize model
    print(model.summary())
    plot_model(model, show_shapes=True)
    return model

# **Train Model**

In [None]:
# train our model
max_length = 74
vocab_size = 19738
model = define_model(vocab_size, max_length)
epochs = 10
j =21
steps = len(descs)
# creating a directory named models to save our models
for i in range(epochs):
    generator = data_generator(descriptions,features, tokenizer, max_length)
    history = model.fit_generator(generator, epochs =1, steps_per_epoch= steps, verbose=1)
    #j = j+1
    #model.save("model21/model_" + str(j) + ".h5")