In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
import os

In [None]:
train_df = pd.read_csv("train.csv")
test_df = pd.read_csv("test.csv")

In [None]:
print("Train shape:", train_df.shape)
print("Test shape:", test_df.shape)
print("\nTrain columns:", train_df.columns)
print("\nFirst 5 rows of train.csv:")
print(train_df.head())

In [None]:
print("\nMissing values in train.csv:")
print(train_df.isnull().sum())

In [None]:
def show_samples(df, image_folder, num=3):
    sample = df.sample(num)
    for _, row in sample.iterrows():
        img_path = os.path.join(image_folder, str(row['image_id']))
        if os.path.exists(img_path):
            image = Image.open(img_path)
            plt.imshow(image)
            plt.axis('off')
            plt.title(row['caption'] if 'caption' in row else "Test Image")
            plt.show()

In [None]:
show_samples(train_df, "train_images", num=3)

In [None]:
from transformers import BlipProcessor, BlipForConditionalGeneration
from PIL import Image
import torch
import os

processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

In [None]:
def generate_caption(img_path):
    image = Image.open(img_path).convert('RGB')
    inputs = processor(images=image, return_tensors="pt").to(device)
    out = model.generate(**inputs)
    caption = processor.decode(out[0], skip_special_tokens=True)
    return caption

In [None]:
test_captions = []
sample_images = test_df["image_id"].tolist()
for image_id in sample_images:
    img_path = os.path.join("test", str(image_id) + ".jpg")   
    caption = generate_caption(img_path)
    test_captions.append((str(image_id), caption))

In [None]:
submission_df = pd.DataFrame(test_captions, columns=["image_id", "caption"])
submission_df.to_csv("submission.csv", index=False)
print("submission.csv saved!")