In [None]:
!pip install pandas
!pip install requests
!pip install -q -U google-genai
!pip install pillow
!pip install google-generativeai

In [None]:
import os
import pandas as pd
import requests
import json
from google import genai
import csv

In [None]:
IMAGES_BASE_DIR = os.path.join("abo-images-small", "images")
IMAGES_PATH = os.path.join(IMAGES_BASE_DIR, "small")
IMAGES_METADATA_PATH = os.path.join(IMAGES_BASE_DIR, "metadata")
IMAGES_METADATA_CSV = os.path.join(IMAGES_METADATA_PATH, "images.csv")

LISTINGS_METADATA_PATH = os.path.join("abo-listings", "listings", "metadata")

In [None]:
df = pd.read_csv(IMAGES_METADATA_CSV)

In [None]:
df.shape

In [None]:
df.head()

In [None]:
json_files = os.listdir(LISTINGS_METADATA_PATH)

In [None]:
json_file_paths = [os.path.join(LISTINGS_METADATA_PATH, filename) for filename in json_files]

In [None]:
images_metadata = []
for filepath in json_file_paths:
    print(f"[INFO] Reading {filepath}")
    with open(filepath, mode="r", encoding="utf-8") as read_file:
        json_strings = read_file.readlines()
        for json_string in json_strings:
            images_metadata.append(json.loads(json_string))

In [None]:
for i in range(147712):
    description_data = images_metadata[i].get("pattern", [])
    if description_data:
        print(i)
        break
  

In [None]:
len(images_metadata)

In [None]:
def get_required_info(metadata):
    keys1 = {
                "bullet_point" : "Bullet Points", 
                 "color" : "Colour", 
                 "fabric_type" : "Fabric Type", 
                 "finish_type" : "Finish Type", 
                 "item_keywords" : "Item Keywords", 
                 "item_name" : "Item Name", 
                 "item_shape" : "Item Shape",
                "material" : "Material", 
                 "pattern" : "Pattern", 
                 "product_description" : "Product Description", 
                 "style" : "Style"
            } # Format: [{ "language_tag": <str>, "value": <str> }, ...]
    keys2 = {"color" : "Colours"} # Format: [{"language_tag": <str>, "standardized_values": [<str>],"value": <str>}, ...]
    keys3 = {"product_type" : "Product Type"} # Format: <str>

    strings = []


    for key in keys1.keys():
        lis = metadata.get(key, [])
        vals = set()
        for entry in lis:
            lang = entry["language_tag"]
            val = entry["value"]
            if lang.lower().startswith("en"):
                vals.add(val)
        if vals:
            strings.append(keys1[key] + ": " + ", ".join(vals))

    for key in keys2.keys():
        lis = metadata.get(key, [])
        vals = set()
        for entry in lis:
            lang = entry["language_tag"]
            val = entry["value"]
            std_vals = entry.get("standardized_values", [])
            if lang.lower().startswith("en"):
                vals.add(val)
                vals = vals | set(std_vals)
        if vals:
            strings.append(keys2[key] + ": " + ", ".join(vals))

    for key in keys3.keys():
        lis = metadata.get(key, [])
        vals = set()
        for entry in lis:
            val = entry["value"]
            vals.add(val)
        if vals:
            strings.append(keys3[key] + ": " + ", ".join(vals))

    return strings  

In [None]:
def get_main_image_path(metadata, df):
    img_id = metadata["main_image_id"]
    img_entry = df[df["image_id"] == img_id]
    img_path = os.path.join(IMAGES_PATH, img_entry["path"].iloc[0])

    return img_path

def get_other_image_paths(metadata, df):
    img_ids = metadata["other_image_id"]
    img_paths = []
    for img_id in img_ids:
        img_entry = df[df["image_id"] == img_id]
        img_path = os.path.join(IMAGES_PATH, img_entry["path"].iloc[0])
        img_paths.append(img_path)

    return img_paths

In [None]:
img_path = get_main_image_path(images_metadata[18], df)

In [None]:
def get_response(client, img_path, metadata):
    img = client.files.upload(file=img_path)
    
    
    response = client.models.generate_content(
        model="gemini-2.0-flash", 
        contents=[img, metadata, "I am preparing a dataset to train a Visual Question Answering (VQA) model. \
        I have a set of images and corresponding metadata from Amazon product listings (Amazon Berkeley Object Dataset). \
        Using the image and its metadata, generate unambiguous questions with one-word answers which should be unambiguously answerable by 'SOLELY' looking at the image.\
        Since I need to automate parsing these question and answers, please provide them in CSV format: question, answer.\
        Please do not generate anything else other than question and answers as it makes it difficult to write an automated parser."]
    )
    
    return response.text

In [None]:
client = genai.Client(api_key="API_KEY") # Add your API key

In [None]:
img_count = 0
seen_paths = set()
with open("main_image_qa_12.csv", "w") as write_file:
    for i, img_metadata in enumerate(images_metadata):
        if (i+1) % 100 == 0:
            print(f"INFO: Images Processed: {(i+1)}/{len(images_metadata)} Images Used: {img_count}/{len(images_metadata)}")

        try:
            req_info = get_required_info(img_metadata)
            if len(req_info) < 5:
                continue

            metadata = "\n".join(req_info)
            image_path = get_main_image_path(img_metadata, df)

            if image_path in seen_paths:
                continue
            else:
                seen_paths.add(image_path)
                    
            response = get_response(client, image_path, metadata)

            lis = response.split("\n")
            qas = list(csv.reader(lis))
            if not qas:
                continue

            qas = [qa for qa in qas if qa]

            if qas[0] == ['```csv']:
                del qas[0]
                del qas[-1]
            del qas[0]

            if qas:
                for qa in qas:
                    if not qa:
                        continue
                    try:
                        write_file.write(f"{image_path}, \"{qa[0]}\", \"{qa[1]}\"\n")
                    except IndexError:
                        pass
            img_count += 1
        except Exception as e:
            print("Error:", e)