In [1]:
# !pip install --upgrade google-genai
# !pip install google-generativeai pillow

In [2]:
import os
import json
from pathlib import Path
import pandas as pd
from PIL import Image
import google.generativeai as genai
import re
import time

In [3]:

# Set the Gemini API key as an environment variable
os.environ['GEMINI_API_KEY'] = 'AIzaSyDuZrIbeTto6yGuHxreBSMheCLjI9MIfvc'
genai.configure(api_key=os.environ["GEMINI_API_KEY"])

model = genai.GenerativeModel(model_name="gemini-2.0-flash")

In [None]:
# meta_list = []
# with open("/kaggle/input/vrproject2/abo-listings/listings/metadata/listings_0.json", "r") as f:
#     for line in f:
#         obj = json.loads(line)
#         meta_list.append(obj)

# print(len(meta_list))


fields_to_keep = [
    "color",
    "item_name",
    "product_type",
    "main_image_id",
    "item_keywords"
]

meta_list = []

with open("/kaggle/input/abo-listings/listings/metadata/listings_1.json", "r") as f:
    for line in f:
        obj = json.loads(line)
        filtered_obj = {key: obj.get(key) for key in fields_to_keep}
        meta_list.append(filtered_obj)

# for key,value in meta_list[0].items():
#     print(key)

# print(meta_list[0])

# useful:
# color
# item_name
# product_type
# main_image_id
# item_keywords

In [5]:
images_df = pd.read_csv("/kaggle/input/vrproject2/abo-images-small/images/metadata/images.csv")
path_by_id = dict(zip(images_df["image_id"], images_df["path"]))

In [None]:
records    = []
skip_count = 1235

try:
    for item in meta_list[skip_count:]:
        if not item.get("main_image_id"):
            print("No main image for:", item.get("item_id"))
            continue

        img_id   = item["main_image_id"]
        rel_path = path_by_id.get(img_id)
        if not rel_path:
            continue

        meta_str = json.dumps(item)

        # main prompt
        # prompt   = (
        # "You are creating a Visual Question Answering (VQA) dataset.\n"
        # f"Product metadata:\n{meta_str}\n\n"
        # "Given this metadata + image, generate 5 distinct question-answer pairs, ensure you know the answer to the question.\n"
        # "- Each answer must be exactly one word.\n"
        # "- The 5 questions generated MUST be different from each other, and MUST be answerable just by looking at the image.\n"
        # "- Return a JSON array of {question,answer} objects."
        # )


        #comparision prompt
        prompt   = (
            "You are creating a Visual Question Answering (VQA) dataset.\n"
            f"Product metadata:\n{meta_str}\n\n"
            "Given this metadata + image, generate 5 distinct question-answer pairs, ensure you know the answer to the question.\n"
            "- Each answer must be exactly one word.\n"
            "- Return a JSON array of {question,answer} objects."
        )

        img      = Image.open(f"/kaggle/input/vrproject2/abo-images-small/images/small/{rel_path}")
        response = model.generate_content([prompt, img])
        raw      = response.text

        # Strip fences
        raw = re.sub(r"^```json\s*\n?", "", raw)
        raw = re.sub(r"\n?```$", "", raw)

        vqa_items = json.loads(raw)
        print("Response for", img_id, ":", vqa_items)

        for qa in vqa_items:
            records.append({
                "image_id": img_id,
                "question": qa["question"],
                "answer":   qa["answer"],
                "path":     rel_path
            })
        print("Last record:", records[-1])

        time.sleep(3)

except Exception as e:
    # Save whatever we have so far
    print(f"Exception encountered: {e!r}")
    df_partial = pd.DataFrame(records)
    df_partial.to_csv("vqa_dataset.csv", index=False)
    print(f"Saved partial results ({len(df_partial)} rows) to vqa_dataset_partial.csv")

In [None]:
vqa_df = pd.DataFrame(records)
vqa_df.to_csv("vqa_dataset.csv", index=False)
print("Generated", len(vqa_df), "Q&A pairs across all images")