In [104]:
import os
import pickle
import numpy as np
from tqdm import tqdm
import re

from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model
from tensorflow.keras.utils import to_categorical, plot_model
from tensorflow.keras.layers import Input, Dense, LSTM, Embedding, Dropout, add

In [65]:
BASE_DIR = r'c:\Users\Acer\Documents\CLASSES - UTD\NLP\Project\images'
WORKING_DIR = os.path.dirname(r'C:\Users\Acer\Documents\CLASSES - UTD\NLP\Project\captions.txt')



In [66]:
model = VGG16()
model = Model(inputs=model.inputs, outputs=model.layers[-2].output)
print(model.summary())

None


In [68]:
features = {}
directory = os.path.join(BASE_DIR)

for img_name in tqdm(os.listdir(directory)):  
    filename = os.path.join(directory, img_name)  # Correct path handling

    # Load and preprocess the image
    image = load_img(filename, target_size=(224, 224))  # Corrected variable name
    image = img_to_array(image)
    image = np.expand_dims(image, axis=0)  # Reshape properly
    image = preprocess_input(image)

    # Extract features using the model
    feature = model.predict(image, verbose=0)
    image_id = img_name.split('.')[0]

    features[image_id] = feature

100%|██████████| 8091/8091 [47:01<00:00,  2.87it/s]  


In [69]:
features_path = os.path.join(WORKING_DIR, 'features.pkl')

In [70]:
with open(features_path, 'wb') as f:
    pickle.dump(features, f)

In [71]:
with open(features_path, 'rb') as f:
    features = pickle.load(f)

In [80]:
with open(os.path.join(WORKING_DIR, 'captions.txt'), 'r') as f:  # Ensure the file name is correct
    next(f)  # Skip the first line
    captions_doc = f.read()  # Read the rest of the file


In [85]:
## mapping of image to captions

mapping = {}

for line in tqdm(captions_doc.split('\n')):
    ## splitting the line as comma(,) as a key
    tokens = line.split(',')
    if len(line) <2:
        continue
    image_id, caption = tokens[0], tokens[1:]
    image_id = image_id.split('.')[0]
    
    # converting caption list to string
    caption = " ".join(caption)

    if image_id not in mapping:
        mapping[image_id] = []
    mapping[image_id].append(caption)    
    

100%|██████████| 40456/40456 [00:00<00:00, 336688.14it/s]


In [86]:
len(mapping)

8091

In [105]:
def clean(mapping):
    for key, captions in mapping.items():
        for i in range(len(captions)):

            caption = captions[i]

            #preprocessing steps
            caption = caption.lower()
            #deleting every other character other than letters.
            caption = re.sub(r'[^a-z\s]', '', caption)  # Remove any character other than letters and spaces
            caption = re.sub(r'\s+', ' ', caption)
            caption = ' '.join([word for word in caption.split() if len(word) > 1])
            caption = f"<start> {caption} <end>"


In [113]:
all_captions = []
for key in mapping:
    for caption in mapping[key]:
        all_captions.append(caption)

In [115]:
len(all_captions)
all_captions[0:10]

['A child in a pink dress is climbing up a set of stairs in an entry way .',
 'A girl going into a wooden building .',
 'A little girl climbing into a wooden playhouse .',
 'A little girl climbing the stairs to her playhouse .',
 'A little girl in a pink dress going into a wooden cabin .',
 'A black dog and a spotted dog are fighting',
 'A black dog and a tri-colored dog playing with each other on the road .',
 'A black dog and a white dog with brown spots are staring at each other in the street .',
 'Two dogs of different breeds looking at each other on the road .',
 'Two dogs on pavement moving toward each other .']

In [117]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(all_captions)
vocab_size = len(tokenizer.word_index)+1
vocab_size

8494

In [118]:
max_length = max(len(caption.split()) for caption in all_captions)
max_length

37

Splitting data into train and test split

In [119]:
image_ids = list(mapping.keys())
split = int(len(image_ids)*0.90)
train = image_ids[:split]
test = image_ids[split:]

Creating data generator to get data in batches( and to avoid crashes)

In [1]:
def data_generator(data_keys, mapping, features, tokenizer, max_length, vocab_size, batch_size):
    X1, X2, y = list(), list(), list()
    n=0
    while 1:
        for key in data_keys:
            for caption in captions:
                seq = tokenizer.texts_to_sequences([caption])[0]

                for i in range(1,len(seq)):
                    in_seq, out_seq = seq[:1], seq[1]
                    in_seq = pad_sequences([in_seq],maxlen=max_length)[0]
                    out_seq = to_categorical([out_seq],num_classes=vocab_size)[0]


                    X1.append(features[key][0])
                    X2.append(in_seq)
                    y.append(out_seq)
            if n == batch_size:
                X1,X2, y = np.array(X1), np.array(X2), np.array(y)
                yield [X1,X2],y
                X1,X2, y = list(), list(), list()
                n = 0



MODEL CREATION  