In [19]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [41]:
!jupyter nbconvert --to script /content/drive/MyDrive/Image-Caption-Generator/src/model.ipynb


This application is used to convert notebook files (*.ipynb)
        to various other formats.


Options
The options below are convenience aliases to configurable class-options,
as listed in the "Equivalent to" description-line of the aliases.
To see all configurable class-options for some <cmd>, use:
    <cmd> --help-all

--debug
    set log level to logging.DEBUG (maximize logging output)
    Equivalent to: [--Application.log_level=10]
--show-config
    Show the application's configuration (human-readable format)
    Equivalent to: [--Application.show_config=True]
--show-config-json
    Show the application's configuration (json format)
    Equivalent to: [--Application.show_config_json=True]
--generate-config
    generate default config file
    Equivalent to: [--JupyterApp.generate_config=True]
-y
    Answer yes to any questions instead of prompting.
    Equivalent to: [--JupyterApp.answer_yes=True]
--execute
    Execute the notebook prior to export.
    Equivalent to: [--ExecutePr

In [15]:
import nbimporter
import sys

# Add the path where your notebook is located
sys.path.append('/content/drive/MyDrive/Image-Caption-Generator/src')

# Import the function from the model notebook
from model import build_image_captioning_model


ModuleNotFoundError: No module named 'model'

In [42]:
from model import build_image_captioning_model


In [3]:
import tensorflow as tf
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
import numpy as np
import pickle
import sys
import os

# Add the parent directory of 'src' to the Python path
current_dir = os.getcwd()  # Get the current working directory
src_path = os.path.abspath(os.path.join(current_dir, "src"))  # Adjust path to 'src'
sys.path.append(src_path)

# Import the model definition (converted to a Python script)
from model import build_image_captioning_model

# Paths to processed data
image_features_path = "/content/drive/MyDrive/Image-Caption-Generator/data/image_features.npy"
captions_path = "/content/drive/MyDrive/Image-Caption-Generator/data/processed_captions.npy"
tokenizer_path = "/content/drive/MyDrive/Image-Caption-Generator/data/tokenizer.pkl"

# Constants
BATCH_SIZE = 64
EPOCHS = 7
LEARNING_RATE = 0.001

# Load data
def load_data():
    image_features = np.load(image_features_path, allow_pickle=True).item()
    captions = np.load(captions_path, allow_pickle=True).item()
    return image_features, captions

# Data generator
def data_generator(image_features, captions, tokenizer, max_caption_length, batch_size):
    while True:
        image_inputs, caption_inputs, targets = [], [], []
        for image_id, caption_set in captions.items():
            for caption in caption_set:
                input_seq = caption[:-1]  # Exclude the last word for inputs
                target_seq = caption[1:]  # Shift the sequence by one for targets

                input_seq_padded = tf.keras.preprocessing.sequence.pad_sequences(
                    [input_seq], maxlen=max_caption_length, padding="post"
                )[0]
                target_seq_padded = tf.keras.preprocessing.sequence.pad_sequences(
                    [target_seq], maxlen=max_caption_length, padding="post"
                )[0]

                image_inputs.append(image_features[image_id])
                caption_inputs.append(input_seq_padded)
                targets.append(target_seq_padded)

                if len(image_inputs) == batch_size:
                    yield (
                        (np.array(image_inputs), np.array(caption_inputs)),
                        np.array(targets),
                    )
                    image_inputs, caption_inputs, targets = [], [], []

print("Building model...")
model = build_image_captioning_model()
model.compile(
    optimizer=Adam(learning_rate=LEARNING_RATE),
    loss=SparseCategoricalCrossentropy(),
    metrics=["accuracy"],
)

# Load tokenizer
with open(tokenizer_path, "rb") as f:
    tokenizer = pickle.load(f)

vocab_size = len(tokenizer.word_index) + 1  # Include padding token
max_caption_length = 35  # Match preprocessing

# Load data
print("Loading data...")
image_features, captions = load_data()

# Create the dataset
dataset = tf.data.Dataset.from_generator(
    lambda: data_generator(image_features, captions, tokenizer, max_caption_length, BATCH_SIZE),
    output_signature=(
        (
            tf.TensorSpec(shape=(BATCH_SIZE, 2048), dtype=tf.float32),
            tf.TensorSpec(shape=(BATCH_SIZE, max_caption_length), dtype=tf.int32),
        ),
        tf.TensorSpec(shape=(BATCH_SIZE, max_caption_length), dtype=tf.int32),
    ),
)

# Train the model
steps_per_epoch = sum(len(captions[image]) * (len(captions[image][0]) - 1) for image in captions) // BATCH_SIZE

checkpoint = ModelCheckpoint(
    "model_checkpoint.keras", save_best_only=True, monitor="loss", mode="min"
)
early_stopping = EarlyStopping(monitor="loss", patience=3, mode="min")

print("Starting training...")
model.fit(
    dataset,
    steps_per_epoch=steps_per_epoch,
    epochs=EPOCHS,
    callbacks=[checkpoint, early_stopping],
)


ValueError: Name tf.RaggedTensorSpec has already been registered for class tensorflow.python.ops.ragged.ragged_tensor.RaggedTensorSpec.