In [12]:
import os
import json
import numpy as np
from PIL import Image
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Input, LSTM, Embedding, Dense, Flatten, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical


ModuleNotFoundError: No module named 'sklearn'

In [9]:

json_file_path = 'C:\\Rohit\\Projects\\Image Data Set\\Flickr30k\\annotations.json'
image_directory = 'C:\\Rohit\\Projects\\Image Data Set\\Flickr30k\\images'


with open(json_file_path, 'r') as f:
    data = json.load(f)

print(f"Total Images : {len(data.items())}")

for image_name, captions in data.items():
    print(f"Image: {image_name}")
    print(f"Captions: {captions['comments']}")
    break


Total Images : 31764
Image: 1000092795.jpg
Captions: ['Two young guys with shaggy hair look at their hands while hanging out in the yard .', 'Two young  White males are outside near many bushes .', 'Two men in green shirts are standing in a yard .', 'A man in a blue shirt standing in a garden .', 'Two friends enjoy time spent together .']


In [None]:
def preprocess_image(image_path, target_size=(299, 299)):
    img = Image.open(image_path).convert('RGB')
    img = img.resize(target_size)
    img_array = np.array(img) / 255.0 
    return img_array


In [None]:
# Collect all captions from JSON
all_captions = []
for captions in data.values():
    all_captions.extend(captions['comments'])

# Tokenize captions
tokenizer = Tokenizer(oov_token='<UNK>', lower=True)
tokenizer.fit_on_texts(all_captions)
vocab_size = len(tokenizer.word_index) + 1

# Convert captions to sequences
captions_sequences = {}
for image_name, captions in data.items():
    captions_sequences[image_name] = tokenizer.texts_to_sequences(captions['comments'])

# Define max length of captions
max_caption_length = 100  # Adjust this based on your data

# Pad sequences to ensure all captions have the same length
for image_name in captions_sequences:
    captions_sequences[image_name] = pad_sequences(captions_sequences[image_name], maxlen=max_caption_length, padding='post')


In [None]:
# List of image file names
image_files = list(data.keys())

# Split image file names into training and validation sets
train_images, val_images = train_test_split(image_files, test_size=0.2, random_state=42)

print(f"Training images: {len(train_images)}, Validation images: {len(val_images)}")


In [None]:
def data_generator(image_files, batch_size=32):
    while True:
        for i in range(0, len(image_files), batch_size):
            batch_images = image_files[i:i + batch_size]
            images, captions = [], []
            for image_name in batch_images:
                # Preprocess the image
                image_path = os.path.join(image_directory, image_name)
                image = preprocess_image(image_path)
                images.append(image)
                
                # Select a random caption for each image
                caption_seq = captions_sequences[image_name]
                random_caption = caption_seq[np.random.randint(0, len(caption_seq))]
                captions.append(random_caption)
                
            yield np.array(images), np.array(captions)

# Example of generating a batch
train_generator = data_generator(train_images, batch_size=4)
train_batch = next(train_generator)
print(f"Image batch shape: {train_batch[0].shape}, Caption batch shape: {train_batch[1].shape}")
