In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import os
import json
from PIL import Image
from tqdm import tqdm

import torch
from transformers import BlipProcessor, BlipForConditionalGeneration

# Load BLIP model and processor
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to("cuda" if torch.cuda.is_available() else "cpu")

# Define the root directory containing the images directly
root_dir = "/kaggle/input/sfvwefwe/New folder (2)"  # <-- change this if needed

# Dictionary to store captions
captions_dict = {}

# Loop through each image file in the root directory
for img_name in tqdm(os.listdir(root_dir)):
    img_path = os.path.join(root_dir, img_name)

    if not os.path.isfile(img_path):
        continue  # skip if it's not a file

    try:
        image = Image.open(img_path).convert('RGB')
        inputs = processor(images=image, return_tensors="pt").to(model.device)

        out = model.generate(**inputs)
        caption = processor.decode(out[0], skip_special_tokens=True)

        # Save in the desired format
        captions_dict[img_path] = {
            "src_prompt": caption,
            "tgt_prompt": [caption]
        }
    except Exception as e:
        print(f"Failed to process {img_path}: {e}")

# Save the dictionary to a JSON file
output_path = "/kaggle/working/dataset.json"
with open(output_path, "w") as f:
    json.dump(captions_dict, f, indent=2)