In [44]:
!pip install --upgrade google-genai
!pip install google-generativeai pillow



In [53]:
import os
import json
from pathlib import Path
import pandas as pd
from PIL import Image
import google.generativeai as genai
import re

In [46]:

# Set the Gemini API key as an environment variable
os.environ['GEMINI_API_KEY'] = 'AIzaSyBzAzpK9RHKClWs4qiBrMYazZSbBZrJl68'
genai.configure(api_key=os.environ["GEMINI_API_KEY"])

model = GenerativeModel(model_name="gemini-2.0-flash")

In [47]:
meta_list = []
with open("/kaggle/input/vrproject2/abo-listings/listings/metadata/listings_0.json", "r") as f:
    for line in f:
        obj = json.loads(line)
        meta_list.append(obj)

In [48]:
images_df = pd.read_csv("/kaggle/input/vrproject2/abo-images-small/images/metadata/images.csv")
path_by_id = dict(zip(images_df["image_id"], images_df["path"]))

In [59]:
records = []
for item in meta_list:
    # gather all image IDs for this product
    image_ids = []
    if item.get("main_image_id"):
        image_ids.append(item["main_image_id"])
    image_ids += item.get("other_image_id", [])

    meta_str = json.dumps(item)

    for img_id in image_ids:
        rel_path = path_by_id.get(img_id)
        if not rel_path:
            continue  # skip missing files
        prompt = (
            "You are creating a Visual Question Answering (VQA) dataset.\n"
            f"Product metadata:\n{meta_str}\n\n"
            "Given this metadata + image, generate 5 distinct question-answer pairs, ensure you know the answer to the question.\n"
            "- Each answer must be exactly one word.\n"
            "- Return a JSON array of {question,answer} objects."
        )

        img = Image.open(f"/kaggle/input/vrproject2/abo-images-small/images/small/{rel_path}")
        
        response = model.generate_content([prompt, img])
        raw = response.text

        # 2. Strip leading ```json (with optional whitespace/newline)
        raw = re.sub(r"^```json\s*\n?", "", raw)
        
        # 3. Strip trailing ``` (and any whitespace before it)
        raw = re.sub(r"\n?```$", "", raw)
        
        # 4. Now parse JSON
        vqa_items = json.loads(raw)

        print("Response: ",vqa_items)

        for qa in vqa_items:
            records.append({
                "image_id": img_id,
                "question": qa["question"],
                "answer":   qa["answer"]
            })

        print(records[-1])

Response:  [{'question': 'What style of shoe is this?', 'answer': 'Loafer'}, {'question': 'What kind of heel does this shoe have?', 'answer': 'Platform'}, {'question': 'What is the product type?', 'answer': 'Shoes'}, {'question': 'What year was this shoe released?', 'answer': '2017'}, {'question': 'What material is the shoe made of?', 'answer': 'Leather'}]
{'image_id': '81iZlv3bjpL', 'question': 'What material is the shoe made of?', 'answer': 'Leather'}
Response:  [{'question': 'What is the product type?', 'answer': 'Shoes'}, {'question': 'What heel type is shown?', 'answer': 'Platform'}, {'question': "What is the shoe's style?", 'answer': 'Pumps'}, {'question': 'What is the brand?', 'answer': 'find.'}, {'question': "What is the shoe's toe style?", 'answer': 'Cap'}]
{'image_id': '91mIRxgziUL', 'question': "What is the shoe's toe style?", 'answer': 'Cap'}
Response:  [{'question': 'What kind of heel do the shoes have?', 'answer': 'Platform'}, {'question': 'What is the product type?', 'an

ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 37
}
]

In [60]:
vqa_df = pd.DataFrame(records)
vqa_df.to_csv("vqa_dataset.csv", index=False)
print("Generated", len(vqa_df), "Q&A pairs across all images")

Generated 110 Q&A pairs across all images
