In [1]:
!pip install --upgrade google-genai
!pip install google-generativeai pillow

Collecting google-genai
  Downloading google_genai-1.13.0-py3-none-any.whl.metadata (32 kB)
Collecting anyio<5.0.0,>=4.8.0 (from google-genai)
  Downloading anyio-4.9.0-py3-none-any.whl.metadata (4.7 kB)
Downloading google_genai-1.13.0-py3-none-any.whl (164 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m164.4/164.4 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading anyio-4.9.0-py3-none-any.whl (100 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m100.9/100.9 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: anyio, google-genai
  Attempting uninstall: anyio
    Found existing installation: anyio 3.7.1
    Uninstalling anyio-3.7.1:
      Successfully uninstalled anyio-3.7.1
  Attempting uninstall: google-genai
    Found existing installation: google-genai 0.8.0
    Uninstalling google-genai-0.8.0:
      Successfully uninstalled google-genai-0.8.0
Successfully installed anyio-4.9.0 google-genai-1.13.0


In [2]:
import os
import json
from pathlib import Path
import pandas as pd
from PIL import Image
import google.generativeai as genai
import re

In [3]:

# Set the Gemini API key as an environment variable
os.environ['GEMINI_API_KEY'] = 'AIzaSyBzAzpK9RHKClWs4qiBrMYazZSbBZrJl68'
genai.configure(api_key=os.environ["GEMINI_API_KEY"])

model = genai.GenerativeModel(model_name="gemini-2.0-flash")

In [4]:
meta_list = []
with open("/kaggle/input/vrproject2/abo-listings/listings/metadata/listings_0.json", "r") as f:
    for line in f:
        obj = json.loads(line)
        meta_list.append(obj)

In [5]:
images_df = pd.read_csv("/kaggle/input/vrproject2/abo-images-small/images/metadata/images.csv")
path_by_id = dict(zip(images_df["image_id"], images_df["path"]))

In [6]:
records = []
skip_count=0

# Loop over meta_list, skipping the first `skip_count` entries
for item in meta_list[skip_count:]:
    if not item.get("main_image_id"):
        print("No main image found for item:", item.get("item_id"))
        continue

    img_id   = item["main_image_id"]
    rel_path = path_by_id.get(img_id)
    if not rel_path:
        continue

    meta_str = json.dumps(item)
    prompt   = (
        "You are creating a Visual Question Answering (VQA) dataset.\n"
        f"Product metadata:\n{meta_str}\n\n"
        "Given this metadata + image, generate 5 distinct question-answer pairs, ensure you know the answer to the question.\n"
        "- Each answer must be exactly one word.\n"
        "- Return a JSON array of {question,answer} objects."
    )

    img = Image.open(f"/kaggle/input/vrproject2/abo-images-small/images/small/{rel_path}")
    response = model.generate_content([prompt, img])
    raw      = response.text

    # Strip markdown fences
    raw = re.sub(r"^```json\s*\n?", "", raw)
    raw = re.sub(r"\n?```$", "", raw)

    # Parse JSON
    vqa_items = json.loads(raw)

    print("Response for", img_id, ":", vqa_items)
    for qa in vqa_items:
        records.append({
            "image_id": img_id,
            "question": qa["question"],
            "answer":   qa["answer"],
            "path": rel_path
        })

    print("Last record:", records[-1])

# When done, save:
vqa_df = pd.DataFrame(records)
vqa_df.to_csv("/kaggle/working/vqa_dataset.csv", index=False)
print("Wrote", len(vqa_df), "rows (skipped first", skip_count, "entries).")

Response for 81iZlv3bjpL : [{'question': 'What type of heel is this?', 'answer': 'Platform'}, {'question': 'What is the product type?', 'answer': 'Shoes'}, {'question': 'What is the brand name?', 'answer': 'Find'}, {'question': 'What shape is the toe?', 'answer': 'Cap'}, {'question': 'What style are these shoes?', 'answer': 'Pumps'}]
Last record: {'image_id': '81iZlv3bjpL', 'question': 'What style are these shoes?', 'answer': 'Pumps'}
Response for 619y9YG9cnL : [{'question': 'What color is the slides?', 'answer': 'White'}, {'question': 'What is the length?', 'answer': '22-inch'}, {'question': 'How many pairs included?', 'answer': 'Ten'}, {'question': 'What is the product type?', 'answer': 'HARDWARE'}, {'question': 'How many pounds?', 'answer': '1.45'}]
Last record: {'image_id': '619y9YG9cnL', 'question': 'How many pounds?', 'answer': '1.45'}
Response for 81NP7qh2L6L : [{'question': 'What color is the filament?', 'answer': 'Yellow'}, {'question': 'What material is the filament made of?'

ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 8
}
]

In [7]:
vqa_df = pd.DataFrame(records)
vqa_df.to_csv("vqa_dataset.csv", index=False)
print("Generated", len(vqa_df), "Q&A pairs across all images")

Generated 80 Q&A pairs across all images
