In [None]:
import os
import pickle
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Embedding, LSTM, Dropout, add
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import ModelCheckpoint

# Create 'pkl' directory if it doesn't exist
if not os.path.exists('pkl'):
    os.makedirs('pkl')

# Load cleaned captions
with open('pkl/cleaned_captions.pkl', 'rb') as f:
    cleaned_captions = pickle.load(f)

# Load image features
with open('pkl/image_features.pkl', 'rb') as f:
    image_features = pickle.load(f)

# Prepare all captions for tokenizer
all_captions = []
for key in cleaned_captions:
    all_captions.extend(cleaned_captions[key])

# Load or create tokenizer
tokenizer_path = 'pkl/tokenizer.pkl'
if os.path.exists(tokenizer_path):
    with open(tokenizer_path, 'rb') as f:
        tokenizer = pickle.load(f)
else:
    from tensorflow.keras.preprocessing.text import Tokenizer
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(all_captions)
    with open(tokenizer_path, 'wb') as f:
        pickle.dump(tokenizer, f)

# Define model parameters
vocab_size = len(tokenizer.word_index) + 1
max_len = max(len(caption.split()) for caption in all_captions)
embedding_dim = 256
units = 256

# Define the model
def define_model(vocab_size, max_len):
    # Image feature extractor model
    inputs1 = Input(shape=(2048,))
    fe1 = Dropout(0.5)(inputs1)
    fe2 = Dense(256, activation='relu')(fe1)

    # Sequence model
    inputs2 = Input(shape=(max_len,))
    se1 = Embedding(vocab_size, 256, mask_zero=True)(inputs2)
    se2 = Dropout(0.5)(se1)
    se3 = LSTM(256)(se2)

    # Decoder (combine models)
    decoder1 = add([fe2, se3])
    decoder2 = Dense(256, activation='relu')(decoder1)
    outputs = Dense(vocab_size, activation='softmax')(decoder2)

    model = Model(inputs=[inputs1, inputs2], outputs=outputs)
    model.compile(loss='categorical_crossentropy', optimizer='adam')
    return model

model = define_model(vocab_size, max_len)

# Data generator
def data_generator(captions, features, tokenizer, max_length, vocab_size, batch_size):
    X1, X2, y = [], [], []
    n = 0
    while True:
        for key, desc_list in captions.items():
            for desc in desc_list:
                seq = tokenizer.texts_to_sequences([desc])[0]
                for i in range(1, len(seq)):
                    in_seq, out_seq = seq[:i], seq[i]
                    in_seq = tf.keras.preprocessing.sequence.pad_sequences([in_seq], maxlen=max_length)[0]
                    out_seq = tf.keras.utils.to_categorical([out_seq], num_classes=vocab_size)[0]
                    X1.append(features[key][0])
                    X2.append(in_seq)
                    y.append(out_seq)
            n += 1
            if n == batch_size:
                yield ([np.array(X1), np.array(X2)], np.array(y))
                X1, X2, y = [], [], []
                n = 0

# Prepare dataset using tf.data
batch_size = 64
steps = len(cleaned_captions) // batch_size
epochs = 20

output_signature = (
    (
        tf.TensorSpec(shape=(None, 2048), dtype=tf.float32),
        tf.TensorSpec(shape=(None, max_len), dtype=tf.int32),
    ),
    tf.TensorSpec(shape=(None, vocab_size), dtype=tf.float32)
)

dataset = tf.data.Dataset.from_generator(
    lambda: data_generator(cleaned_captions, image_features, tokenizer, max_len, vocab_size, batch_size),
    output_signature=output_signature
)

# Define checkpoint
checkpoint = ModelCheckpoint('model/image_caption_model.h5', monitor='loss', save_best_only=True, verbose=1)

# Train the model
model.fit(dataset, epochs=epochs, steps_per_epoch=steps, callbacks=[checkpoint])


FileNotFoundError: [Errno 2] No such file or directory: 'pkl/cleaned_captions.pkl'