<a href="https://colab.research.google.com/github/Karma-tic/AI-Image-Caption-Generator/blob/main/train.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# RUN THIS (one cell) — sets up a clean workspace in Drive and installs deps
from google.colab import drive
import os, sys, subprocess, json

# 1) mount drive
drive.mount('/content/drive', force_remount=True)

# 2) create clean project folder (new)
BASE = "/content/drive/MyDrive/image-captioning-clean"
subdirs = ["data", "preprocessed", "models", "reports", "notebooks"]
for d in subdirs:
    os.makedirs(os.path.join(BASE, d), exist_ok=True)

# 3) install required libraries
print("\nInstalling libraries (this may take 1-2 minutes)...\n")
!pip -q install --upgrade pip
!pip -q install transformers datasets torch torchvision pillow nltk tqdm

# 4) quick check
print("\n✅ Clean project folder created at:", BASE)
print("Subfolders:", os.listdir(BASE))
print("\n✅ Libraries installed. Python version:", sys.version.splitlines()[0])


Mounted at /content/drive

Installing libraries (this may take 1-2 minutes)...

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m53.1 MB/s[0m eta [36m0:00:00[0m
[?25h
✅ Clean project folder created at: /content/drive/MyDrive/image-captioning-clean
Subfolders: ['notebooks', 'data', 'preprocessed', 'models', 'reports']

✅ Libraries installed. Python version: 3.12.12 (main, Oct 10 2025, 08:52:57) [GCC 11.4.0]


In [None]:
import os
import requests
import zipfile

DATA_DIR = "/content/drive/MyDrive/image-captioning-clean/data"
os.makedirs(DATA_DIR, exist_ok=True)

# Official Kaggle mirror source (no login required)
images_url = "https://github.com/jbrownlee/Datasets/releases/download/Flickr8k/Flickr8k_Dataset.zip"
captions_url = "https://github.com/jbrownlee/Datasets/releases/download/Flickr8k/Flickr8k_text.zip"

images_zip = os.path.join(DATA_DIR, "Flickr8k_Dataset.zip")
text_zip = os.path.join(DATA_DIR, "Flickr8k_text.zip")

def download_file(url, path):
    if not os.path.exists(path):
        print("Downloading:", url)
        r = requests.get(url, stream=True)
        with open(path, "wb") as f:
            for chunk in r.iter_content(chunk_size=1024*1024):
                if chunk:
                    f.write(chunk)
    else:
        print("Already exists:", path)

download_file(images_url, images_zip)
download_file(captions_url, text_zip)

# extract images
with zipfile.ZipFile(images_zip, 'r') as z:
    z.extractall(os.path.join(DATA_DIR, "images"))

# extract captions
with zipfile.ZipFile(text_zip, 'r') as z:
    z.extractall(os.path.join(DATA_DIR, "text"))

print("\n📁 Images folder:", os.listdir(os.path.join(DATA_DIR, "images"))[:3])
print("📁 Text folder:", os.listdir(os.path.join(DATA_DIR, "text")))
print("\n✅ Dataset downloaded and extracted successfully!")


Downloading: https://github.com/jbrownlee/Datasets/releases/download/Flickr8k/Flickr8k_Dataset.zip
Downloading: https://github.com/jbrownlee/Datasets/releases/download/Flickr8k/Flickr8k_text.zip

📁 Images folder: ['__MACOSX', 'Flicker8k_Dataset']
📁 Text folder: ['readme.txt', 'CrowdFlowerAnnotations.txt', 'Flickr_8k.testImages.txt', 'ExpertAnnotations.txt', '__MACOSX', 'Flickr_8k.devImages.txt', 'Flickr8k.token.txt', 'Flickr_8k.trainImages.txt', 'Flickr8k.lemma.token.txt']

✅ Dataset downloaded and extracted successfully!


In [None]:
import os
import json

DATA_DIR = "/content/drive/MyDrive/image-captioning-clean/data"
text_path = os.path.join(DATA_DIR, "text", "Flickr8k.token.txt")

captions = {}

# Read official caption file
with open(text_path, "r") as f:
    for line in f:
        line = line.strip()
        img, caption = line.split("\t")
        img_name = img.split("#")[0]  # remove #0, #1, #2, #3, #4
        caption = caption.strip()

        if img_name not in captions:
            captions[img_name] = []
        captions[img_name].append(caption)

# Save clean mapping
save_path = os.path.join(DATA_DIR, "captions.json")
with open(save_path, "w") as f:
    json.dump(captions, f, indent=2)

print("Total images with captions:", len(captions))
print("Example entry:\n")
first_key = list(captions.keys())[0]
print(first_key, "→", captions[first_key][:2])
print("\n✅ Clean caption dataset saved at:", save_path)


Total images with captions: 8092
Example entry:

1000268201_693b08cb0e.jpg → ['A child in a pink dress is climbing up a set of stairs in an entry way .', 'A girl going into a wooden building .']

✅ Clean caption dataset saved at: /content/drive/MyDrive/image-captioning-clean/data/captions.json


In [None]:
import os, json

DATA_DIR = "/content/drive/MyDrive/image-captioning-clean/data"

# Load caption mapping
with open(os.path.join(DATA_DIR, "captions.json"), "r") as f:
    captions = json.load(f)

# Load official split lists
train_list = open(os.path.join(DATA_DIR, "text", "Flickr_8k.trainImages.txt")).read().splitlines()
val_list   = open(os.path.join(DATA_DIR, "text", "Flickr_8k.devImages.txt")).read().splitlines()
test_list  = open(os.path.join(DATA_DIR, "text", "Flickr_8k.testImages.txt")).read().splitlines()

def build_split(img_list, split_name):
    output = []
    for img in img_list:
        if img in captions:
            output.append({
                "image": img,
                "captions": captions[img]
            })
    save_path = os.path.join(DATA_DIR, f"{split_name}.json")
    with open(save_path, "w") as f:
        json.dump(output, f, indent=2)
    print(f"{split_name} size:", len(output), "→ saved to", save_path)

# Build all splits
build_split(train_list, "train")
build_split(val_list,   "val")
build_split(test_list,  "test")


train size: 6000 → saved to /content/drive/MyDrive/image-captioning-clean/data/train.json
val size: 1000 → saved to /content/drive/MyDrive/image-captioning-clean/data/val.json
test size: 1000 → saved to /content/drive/MyDrive/image-captioning-clean/data/test.json


In [None]:
from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer
import torch

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)

# Load pretrained model components
model_name = "nlpconnect/vit-gpt2-image-captioning"

model = VisionEncoderDecoderModel.from_pretrained(model_name)
processor = ViTImageProcessor.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Fix tokenizer padding issues
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

# Move model to GPU
model.to(device)

print("\n✅ Model, tokenizer, processor loaded successfully")
print("Tokenizer vocab size:", len(tokenizer))


Using device: cpu


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/982M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/982M [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/228 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/241 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/120 [00:00<?, ?B/s]


✅ Model, tokenizer, processor loaded successfully
Tokenizer vocab size: 50257


In [None]:
import json
from torch.utils.data import Dataset, DataLoader
from PIL import Image

DATA_DIR = "/content/drive/MyDrive/image-captioning-clean/data"
IMG_DIR = os.path.join(DATA_DIR, "images", "Flicker8k_Dataset")

class Flickr8kDataset(Dataset):
    def __init__(self, json_path, tokenizer, processor, max_len=30):
        with open(json_path, "r") as f:
            self.data = json.load(f)
        self.tokenizer = tokenizer
        self.processor = processor
        self.max_len = max_len

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        entry = self.data[idx]
        img_name = entry["image"]
        captions = entry["captions"]

        # Use first caption
        caption = captions[0]

        # Load image
        img_path = os.path.join(IMG_DIR, img_name)
        image = Image.open(img_path).convert("RGB")

        # Process image
        pixel_values = self.processor(images=image, return_tensors="pt").pixel_values.squeeze(0)

        # Tokenize caption
        enc = self.tokenizer(
            caption,
            padding="max_length",
            truncation=True,
            max_length=self.max_len,
            return_tensors="pt"
        )

        labels = enc.input_ids.squeeze(0)
        labels[labels == self.tokenizer.pad_token_id] = -100

        decoder_input_ids = enc.input_ids.squeeze(0).clone()
        decoder_input_ids[decoder_input_ids == self.tokenizer.pad_token_id] = self.tokenizer.eos_token_id

        attention_mask = enc.attention_mask.squeeze(0)

        return {
            "pixel_values": pixel_values,
            "labels": labels,
            "decoder_input_ids": decoder_input_ids,
            "attention_mask": attention_mask
        }

print("Dataset class ready!")


Dataset class ready!


In [None]:
# Build train and validation datasets
train_json = "/content/drive/MyDrive/image-captioning-clean/data/train.json"
val_json   = "/content/drive/MyDrive/image-captioning-clean/data/val.json"

train_dataset = Flickr8kDataset(train_json, tokenizer, processor)
val_dataset   = Flickr8kDataset(val_json, tokenizer, processor)

# Dataloaders
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
val_loader   = DataLoader(val_dataset, batch_size=4, shuffle=False)

print("Train size:", len(train_dataset))
print("Val size:", len(val_dataset))

# ---- SANITY CHECK: one batch through the model ----
batch = next(iter(train_loader))

pixel_values = batch["pixel_values"].to(device)
decoder_input_ids = batch["decoder_input_ids"].to(device)
labels = batch["labels"].to(device)
attention_mask = batch["attention_mask"].to(device)

model.train()
out = model(
    pixel_values=pixel_values,
    decoder_input_ids=decoder_input_ids,
    labels=labels,
    attention_mask=attention_mask
)
print("\nSanity forward pass OK! Loss =", out.loss.item())


Train size: 6000
Val size: 1000


IndexError: index out of range in self

In [None]:
# ---- FIXED SANITY CHECK ----
batch = next(iter(train_loader))

pixel_values = batch["pixel_values"].to(device)
labels = batch["labels"].to(device)

model.train()
out = model(
    pixel_values=pixel_values,
    labels=labels
)
print("\nSanity forward pass OK! Loss =", out.loss.item())


We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.
`loss_type=None` was set in the config but it is unrecognized. Using the default loss: `ForCausalLMLoss`.



Sanity forward pass OK! Loss = 9.805530548095703


In [None]:
batch = next(iter(train_loader))

pixel_values = batch["pixel_values"].to(device)
labels = batch["labels"].to(device)

model.train()
out = model(
    pixel_values=pixel_values,
    labels=labels
)
print("\nSanity forward pass OK! Loss =", out.loss.item())



Sanity forward pass OK! Loss = 9.901902198791504


In [None]:
# --- FULL TRAIN (3 epochs) with safe/hands-off config ---
import os, json, random
import torch
from torch.optim import AdamW
from tqdm import tqdm
from nltk.translate.bleu_score import corpus_bleu, SmoothingFunction

# Paths
BASE = "/content/drive/MyDrive/image-captioning-clean"
MODEL_DIR = os.path.join(BASE, "models")
REPORT_DIR = os.path.join(BASE, "reports")
os.makedirs(MODEL_DIR, exist_ok=True)
os.makedirs(REPORT_DIR, exist_ok=True)

# Dataloaders (already defined earlier)
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)   # safe batch
val_loader   = DataLoader(val_dataset,   batch_size=8, shuffle=False)
test_json = os.path.join(BASE, "data", "test.json")
with open(test_json, "r") as f:
    test_items = json.load(f)

# Optimizer
optimizer = AdamW(model.parameters(), lr=5e-5)

# Training loop
num_epochs = 3
for epoch in range(1, num_epochs+1):
    model.train()
    running = 0.0
    pbar = tqdm(train_loader, desc=f"Epoch {epoch}/{num_epochs} (train)")
    for batch in pbar:
        optimizer.zero_grad()
        pixel_values = batch["pixel_values"].to(device)
        labels = batch["labels"].to(device)

        outputs = model(pixel_values=pixel_values, labels=labels)
        loss = outputs.loss
        loss.backward()
        optimizer.step()

        running += loss.item()
        pbar.set_postfix(train_loss=running/ (pbar.n if pbar.n>0 else 1))

    avg_train_loss = running / len(train_loader)
    print(f"\n✅ Epoch {epoch} finished. Avg train loss: {avg_train_loss:.4f}")

    # Save checkpoint for this epoch
    ckpt_dir = os.path.join(MODEL_DIR, f"epoch_{epoch}")
    os.makedirs(ckpt_dir, exist_ok=True)
    model.save_pretrained(ckpt_dir)
    tokenizer.save_pretrained(ckpt_dir)
    processor.save_pretrained(ckpt_dir)
    print(f"💾 Checkpoint saved: {ckpt_dir}")

    # Validation (quick)
    model.eval()
    val_running = 0.0
    with torch.no_grad():
        for batch in val_loader:
            pv = batch["pixel_values"].to(device)
            labels = batch["labels"].to(device)
            out = model(pixel_values=pv, labels=labels)
            val_running += out.loss.item()
    avg_val_loss = val_running / len(val_loader)
    print(f"🔎 Epoch {epoch} validation loss: {avg_val_loss:.4f}\n")

# ---- After training: evaluate on 50 random test images (BLEU) ----
print("Starting evaluation on test subset...")

# pick 50 random test samples
random.seed(42)
test_subset = random.sample(test_items, 50)

references, hypotheses = [], []
sample_results = []

smoothing = SmoothingFunction().method4

model.eval()
with torch.no_grad():
    for item in tqdm(test_subset, desc="Evaluating"):
        img_name = item["image"]
        img_path = os.path.join(BASE, "data", "images", "Flicker8k_Dataset", img_name)
        try:
            from PIL import Image
            image = Image.open(img_path).convert("RGB")
            pixel_values = processor(images=image, return_tensors="pt").pixel_values.to(device)

            out_ids = model.generate(
                pixel_values,
                max_new_tokens=30,
                num_beams=5,
                repetition_penalty=2.5,
                no_repeat_ngram_size=3,
                early_stopping=True
            )

            pred = tokenizer.decode(out_ids[0], skip_special_tokens=True).lower()
            refs = [c.lower().split() for c in item["captions"]]
            hyps = pred.split()

            references.append(refs)
            hypotheses.append(hyps)
            sample_results.append({
                "image": img_name,
                "predicted": pred,
                "actual": item["captions"]
            })
        except Exception as e:
            print("Skipping", img_name, ":", e)

# Compute BLEU scores with smoothing
bleu1 = corpus_bleu(references, hypotheses, weights=(1,0,0,0), smoothing_function=smoothing)
bleu2 = corpus_bleu(references, hypotheses, weights=(0.5,0.5,0,0), smoothing_function=smoothing)
bleu3 = corpus_bleu(references, hypotheses, weights=(0.33,0.33,0.33,0), smoothing_function=smoothing)
bleu4 = corpus_bleu(references, hypotheses, weights=(0.25,0.25,0.25,0.25), smoothing_function=smoothing)

metrics = {
    "BLEU-1": bleu1,
    "BLEU-2": bleu2,
    "BLEU-3": bleu3,
    "BLEU-4": bleu4,
    "train_epochs": num_epochs
}

# Save results
with open(os.path.join(REPORT_DIR, "final_metrics.json"), "w") as f:
    json.dump(metrics, f, indent=2)
with open(os.path.join(REPORT_DIR, "sample_results.json"), "w") as f:
    json.dump(sample_results, f, indent=2)

print("\n✅ Evaluation complete. Metrics:")
print(json.dumps(metrics, indent=2))
print(f"📁 Saved metrics and samples to {REPORT_DIR}")


Epoch 1/3 (train):   1%|          | 10/1500 [02:32<5:55:53, 14.33s/it, train_loss=8.2]

In [None]:
from google.colab import drive
drive.mount('/content/drive')

import os, json, random
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW
from tqdm import tqdm
from nltk.translate.bleu_score import corpus_bleu, SmoothingFunction
from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)


Mounted at /content/drive
Using device: cuda


In [None]:

---

# ⭐ **CELL 2 — Load the clean dataset class**

```python
DATA_DIR = "/content/drive/MyDrive/image-captioning-clean/data"
IMG_DIR = os.path.join(DATA_DIR, "images", "Flicker8k_Dataset")

class Flickr8kDataset(Dataset):
    def __init__(self, json_path, tokenizer, processor, max_len=30):
        with open(json_path, "r") as f:
            self.data = json.load(f)
        self.tokenizer = tokenizer
        self.processor = processor
        self.max_len = max_len

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        entry = self.data[idx]
        img_name = entry["image"]
        captions = entry["captions"]
        caption = captions[0]

        img_path = os.path.join(IMG_DIR, img_name)
        image = Image.open(img_path).convert("RGB")

        pixel_values = self.processor(images=image, return_tensors="pt").pixel_values.squeeze(0)

        enc = self.tokenizer(
            caption,
            padding="max_length",
            truncation=True,
            max_length=self.max_len,
            return_tensors="pt"
        )

        labels = enc.input_ids.squeeze(0)
        labels[labels == self.tokenizer.pad_token_id] = -100

        return {
            "pixel_values": pixel_values,
            "labels": labels
        }

print("Dataset class ready.")


SyntaxError: invalid syntax (ipython-input-971152969.py, line 1)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

import os, json, random
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW
from tqdm import tqdm
from nltk.translate.bleu_score import corpus_bleu, SmoothingFunction
from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)



Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Using device: cuda


In [None]:
DATA_DIR = "/content/drive/MyDrive/image-captioning-clean/data"
IMG_DIR = os.path.join(DATA_DIR, "images", "Flicker8k_Dataset")

class Flickr8kDataset(Dataset):
    def __init__(self, json_path, tokenizer, processor, max_len=30):
        with open(json_path, "r") as f:
            self.data = json.load(f)
        self.tokenizer = tokenizer
        self.processor = processor
        self.max_len = max_len

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        entry = self.data[idx]
        img_name = entry["image"]
        captions = entry["captions"]

        caption = captions[0]  # use first caption

        # Load image
        img_path = os.path.join(IMG_DIR, img_name)
        image = Image.open(img_path).convert("RGB")

        # Image → pixel values
        pixel_values = self.processor(images=image, return_tensors="pt").pixel_values.squeeze(0)

        # Caption → tokens
        enc = self.tokenizer(
            caption,
            padding="max_length",
            truncation=True,
            max_length=self.max_len,
            return_tensors="pt"
        )

        labels = enc.input_ids.squeeze(0)
        labels[labels == self.tokenizer.pad_token_id] = -100  # ignore loss for pads

        return {
            "pixel_values": pixel_values,
            "labels": labels
        }

print("Dataset class ready.")


Dataset class ready.


In [None]:
model_name = "nlpconnect/vit-gpt2-image-captioning"

model = VisionEncoderDecoderModel.from_pretrained(model_name)
processor = ViTImageProcessor.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Fix tokenizer padding
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

# Move entire model to GPU
model.to(device)

print("Model loaded on:", device)
print("Tokenizer vocab size:", len(tokenizer))



The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/982M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/982M [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/228 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/241 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/120 [00:00<?, ?B/s]

Model loaded on: cuda
Tokenizer vocab size: 50257


In [None]:
# Build dataset paths
train_json = "/content/drive/MyDrive/image-captioning-clean/data/train.json"
val_json   = "/content/drive/MyDrive/image-captioning-clean/data/val.json"

# Create dataset objects
train_dataset = Flickr8kDataset(train_json, tokenizer, processor)
val_dataset   = Flickr8kDataset(val_json,   tokenizer, processor)

# Dataloaders
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
val_loader   = DataLoader(val_dataset,   batch_size=4, shuffle=False)

print("Train size:", len(train_dataset))
print("Val size:", len(val_dataset))

# -------- SANITY TEST (correct version) --------
batch = next(iter(train_loader))

pixel_values = batch["pixel_values"].to(device)
labels       = batch["labels"].to(device)

# No decoder_input_ids passed — model handles it internally
model.train()
out = model(pixel_values=pixel_values, labels=labels)

print("\nSanity forward pass OK! Loss =", out.loss.item())


FileNotFoundError: [Errno 2] No such file or directory: '/content/drive/MyDrive/image-captioning-clean/data/train.json'

In [None]:
!ls "/content/drive/MyDrive/image-captioning-clean/data"


In [None]:
!ls "/content/drive/MyDrive"


'AI ML Future Project Ideas'$'\n''.gdoc'
 COCO2017
 CoinbaseWalletBackups
'Colab Notebooks'
'Evoastra - Team C Project'
'Google Photos'
 image-captioning
 image-captioning-clean
'i want to try every income source, whichever poss....gsheet'
 Project.gsite
 Resume.gdoc
'SujeetFSD (1).pdf'
'SujeetFSD (2).pdf'
'SujeetFSD (3).pdf'
'SujeetFSD (4).pdf'
'SujeetFSD (5).pdf'
'SujeetFSD (6).pdf'
 SujeetFSD.pdf
'Sujeet Singh_250908_183143.pdf'
 sujeet-singh-full-stack-developer_250827_162841.pdf
'Team C - Evoastra Project'
