In [1]:
import json
import os
import pandas as pd
from PIL import Image
from tqdm import tqdm
from transformers import AutoProcessor, Gemma3ForConditionalGeneration
import torch
import random
import time

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# === CONFIG ===
MODEL_ID = "google/gemma-3-4b-it"
DATASETS = {
    "bar": {
        "jsonl": "benchmark_images/bar_charts/bar_metadata.jsonl",
        "img_dir": "benchmark_images/bar_charts",
        "output_csv": "gemma3_4b_results_bar.csv"
    },
    "line": {
        "jsonl": "benchmark_images/line_poly_charts/line_poly_metadata.jsonl",
        "img_dir": "benchmark_images/line_poly_charts",
        "output_csv": "gemma3_4b_results_line.csv"
    },
    "scatter": {
        "jsonl": "benchmark_images/scatter_charts/scatter_metadata.jsonl",
        "img_dir": "benchmark_images/scatter_charts",
        "output_csv": "gemma3_4b_results_scatter.csv"
    }
}

In [3]:
# === LOAD MODEL ===
print("🔧 Loading model and processor...")
model = Gemma3ForConditionalGeneration.from_pretrained(MODEL_ID, device_map="auto").eval()
processor = AutoProcessor.from_pretrained(MODEL_ID)

🔧 Loading model and processor...


To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Fetching 2 files: 100%|██████████| 2/2 [11:45<00:00, 352.83s/it]
Loading checkpoint shards: 100%|██████████| 2/2 [00:14<00:00,  7.34s/it]
Some parameters are on the meta device because they were offloaded to the cpu and disk.
Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


In [4]:
system_prompt = """
You are an intelligent assistant that helps extract precise numeric values from scientific charts. 
Each chart may include bar plots, line graphs, or scatter points. To answer accurately:

1. First, analyze the chart axes — determine the visible x and y ranges, including tick values and scales (linear/logarithmic).
2. Then, identify the specific visual element referenced in the user's question — such as a bar, point, or curve corresponding to a particular x-value.
3. Carefully estimate the corresponding y-value by using the position of the element relative to the axis scale.
4. Always return a single numeric value (float) as your answer, without explanation, unit, or additional text.
"""

# === PREDICTION FUNCTION ===
def query_model(image: Image.Image, x_val: int) -> str:
    prompt = f"What is the Y value at X={x_val} in this chart?"
    messages = [
        {"role": "system", "content": [{"type": "text", "text": system_prompt}]},
        {"role": "user", "content": [
            {"type": "image", "image": image},
            {"type": "text", "text": prompt}
        ]}
    ]
    inputs = processor.apply_chat_template(
        messages, add_generation_prompt=True, tokenize=True,
        return_dict=True, return_tensors="pt"
    ).to(model.device, dtype=torch.bfloat16)

    input_len = inputs["input_ids"].shape[-1]
    with torch.inference_mode():
        output = model.generate(**inputs, max_new_tokens=100, do_sample=False)
    decoded = processor.decode(output[0][input_len:], skip_special_tokens=True)
    return decoded.strip()

In [5]:
# === MAIN LOOP FOR EACH TYPE ===
for chart_type, cfg in DATASETS.items():
    print(f"📁 Processing {chart_type} charts...")
    results = []

    with open(cfg["jsonl"], "r", encoding="utf-8") as f:
        lines = f.readlines()

    for line in tqdm(lines, desc=f"🖼️ {chart_type}"):
        if random.random() > 0.5:
            continue
        entry = json.loads(line)
        print(f"🔍 Processing image {entry['id']}...")
        image_id = entry["id"]
        image_path = os.path.join(cfg["img_dir"], os.path.basename(entry["image"]).replace("\\", "/"))
        points = random.sample(entry["points"], min(1, len(entry["points"])))

        try:
            image = Image.open(image_path).convert("RGB")
        except Exception as e:
            print(f"❌ Cannot open image {image_id}: {e}")
            continue

        for point in points:
            x_val = point["x"]
            y_true = point["y"]
            start_time = time.perf_counter()
            y_pred_raw = query_model(image, x_val)
            elapsed_time = time.perf_counter() - start_time

            # Try to parse float
            try:
                y_pred = float(y_pred_raw.split()[0].replace(",", "."))
            except Exception:
                y_pred = None

            abs_error = abs(y_pred - y_true) if y_pred is not None else None
            results.append({
                "chart_type": chart_type,
                "image_id": image_id,
                "x": x_val,
                "y_true": y_true,
                "y_pred": y_pred,
                "abs_error": abs_error,
                "raw_output": y_pred_raw,
                "inference_time": elapsed_time
            })

    df = pd.DataFrame(results)
    df.to_csv(cfg["output_csv"], index=False)
    print(f"✅ Saved {chart_type} results to {cfg['output_csv']}")

📁 Processing bar charts...


🖼️ bar:   0%|          | 0/20 [00:00<?, ?it/s]

🔍 Processing image 48e6a097-e192-4e83-9d09-cc4f23efec9b...


The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
🖼️ bar:  10%|█         | 2/20 [02:55<26:23, 87.97s/it]

🔍 Processing image ba23b1c0-4e6c-4a67-8d44-353bcf4c34a5...


The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
🖼️ bar:  15%|█▌        | 3/20 [05:36<33:29, 118.18s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🔍 Processing image ab66dde9-8626-41ae-b65c-5815f9785fd7...


🖼️ bar:  25%|██▌       | 5/20 [08:22<24:47, 99.18s/it] The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🔍 Processing image 1c61f34b-b7cf-4e76-9ac2-baf89f997da0...


🖼️ bar:  30%|███       | 6/20 [11:10<27:37, 118.37s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🔍 Processing image d3f0c7dc-cad3-4ecc-8f7b-909842287ddb...


🖼️ bar:  35%|███▌      | 7/20 [13:51<28:15, 130.43s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🔍 Processing image 7937206f-62e5-43ce-bde6-670178268c7b...


🖼️ bar:  40%|████      | 8/20 [16:42<28:28, 142.37s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🔍 Processing image 286f8a36-3844-41a2-823e-5f1000b82595...


🖼️ bar:  50%|█████     | 10/20 [19:58<20:22, 122.21s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🔍 Processing image 8887347e-c519-4dc6-8ae7-a7fb3f2b876e...


🖼️ bar:  55%|█████▌    | 11/20 [23:04<20:41, 137.89s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🔍 Processing image c2d9a5d1-87f0-419d-8f33-8615016272e6...


🖼️ bar:  65%|██████▌   | 13/20 [26:03<13:44, 117.83s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🔍 Processing image 23b9dc83-6481-43e6-bee4-2eca74472b65...


🖼️ bar:  75%|███████▌  | 15/20 [28:57<08:52, 106.52s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🔍 Processing image 3a9ef51d-293d-478b-8866-3ada0b7b7ec1...


🖼️ bar: 100%|██████████| 20/20 [31:36<00:00, 94.80s/it] 


✅ Saved bar results to gemma3_4b_results_bar.csv
📁 Processing line charts...


🖼️ line:   0%|          | 0/50 [00:00<?, ?it/s]

🔍 Processing image 21b2eff1-d2a4-4163-ac29-839327bcde89...


The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
🖼️ line:   2%|▏         | 1/50 [03:27<2:49:03, 207.00s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🔍 Processing image eb35b156-e636-4b1a-8eaf-f5636d2b3ab0...


🖼️ line:  10%|█         | 5/50 [06:20<50:44, 67.66s/it]   

🔍 Processing image 5ffb678e-579d-4c24-9d7d-e2414a6613bb...


The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
🖼️ line:  16%|█▌        | 8/50 [10:03<49:37, 70.90s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🔍 Processing image 0ab6874d-32ff-4060-a4a8-152b2c18d56f...


🖼️ line:  24%|██▍       | 12/50 [14:23<43:07, 68.08s/it]

🔍 Processing image e9fb3f7b-8905-4e06-bbc1-5f67c45b3ebe...


The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
🖼️ line:  30%|███       | 15/50 [17:21<38:00, 65.15s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🔍 Processing image 0442d21d-f377-47f9-af64-30ae2bada22b...


🖼️ line:  32%|███▏      | 16/50 [21:05<49:24, 87.20s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🔍 Processing image 697e4fb1-fade-478d-b60b-d40cb1ef9759...


🖼️ line:  34%|███▍      | 17/50 [25:21<1:03:16, 115.05s/it]

🔍 Processing image 62a79057-35d1-4adb-83c8-fddd0c83195c...


The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
🖼️ line:  36%|███▌      | 18/50 [29:00<1:11:55, 134.86s/it]

🔍 Processing image 3894973b-6b46-4862-8fb3-861065402af5...


The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
🖼️ line:  42%|████▏     | 21/50 [32:34<51:21, 106.24s/it]  The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🔍 Processing image aa99daea-ff21-4c66-87e1-30099bd2b4cd...


🖼️ line:  50%|█████     | 25/50 [36:49<36:06, 86.67s/it] The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🔍 Processing image d15fe08d-f749-4f1d-930a-599bd106cb69...


🖼️ line:  54%|█████▍    | 27/50 [40:06<34:20, 89.61s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🔍 Processing image 860f75ae-fc20-4807-a626-314f58bb2a22...


🖼️ line:  58%|█████▊    | 29/50 [42:58<31:02, 88.70s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🔍 Processing image 9d813866-f5ea-4dff-9107-13698c61f4f1...


🖼️ line:  60%|██████    | 30/50 [45:27<32:41, 98.10s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🔍 Processing image fe2689a0-06fd-477d-8a77-5083b0d6216e...


🖼️ line:  64%|██████▍   | 32/50 [48:24<28:33, 95.18s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🔍 Processing image 76ce112f-ec78-4ad9-a3f3-aa482bd49c71...


🖼️ line:  66%|██████▌   | 33/50 [51:25<31:21, 110.66s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🔍 Processing image 875c08d4-fadd-4220-84fe-b58c2b9f191f...


🖼️ line:  72%|███████▏  | 36/50 [54:43<21:16, 91.16s/it] The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🔍 Processing image 5bfbca18-19e2-4f98-86a8-a8facb7fa03b...


🖼️ line:  76%|███████▌  | 38/50 [56:25<15:51, 79.30s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🔍 Processing image bc6954eb-8939-4b3c-8ea7-a3964996d52f...


🖼️ line:  80%|████████  | 40/50 [57:18<10:36, 63.70s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🔍 Processing image ed400a1c-7ecf-4d05-aa3a-f27c9a2b71b9...


🖼️ line:  86%|████████▌ | 43/50 [58:15<05:24, 46.33s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🔍 Processing image 53c7c4d0-9311-42b4-a040-0535ec2a59db...


🖼️ line:  88%|████████▊ | 44/50 [59:03<04:39, 46.60s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🔍 Processing image 1b4584e9-58b4-4dde-a9dc-6153e545f529...


🖼️ line:  96%|█████████▌| 48/50 [59:45<00:59, 29.69s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🔍 Processing image 26761cad-57b4-4759-92c5-d6bad407725e...


🖼️ line:  98%|█████████▊| 49/50 [1:00:23<00:30, 30.86s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🔍 Processing image 57583e33-e9b4-4971-bb51-5a6177dc1f30...


🖼️ line: 100%|██████████| 50/50 [1:01:12<00:00, 73.44s/it]


✅ Saved line results to gemma3_4b_results_line.csv
📁 Processing scatter charts...


🖼️ scatter:   0%|          | 0/50 [00:00<?, ?it/s]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🔍 Processing image 87c41ece-da07-41f6-ac52-0225562ca0de...


🖼️ scatter:   2%|▏         | 1/50 [00:41<34:13, 41.90s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🔍 Processing image 598a978d-572c-4b3f-a73f-8202d7523bce...


🖼️ scatter:   4%|▍         | 2/50 [01:21<32:13, 40.28s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🔍 Processing image faeaf246-1551-404a-8dbc-370aade99e1a...


🖼️ scatter:   8%|▊         | 4/50 [02:00<21:03, 27.46s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🔍 Processing image 0a95cf02-f5a5-4ac2-8f2e-cdc65eb9bfd9...


🖼️ scatter:  10%|█         | 5/50 [02:40<23:31, 31.37s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🔍 Processing image 586314c9-b5ec-4bcc-aefa-4e125cb26d3e...


🖼️ scatter:  12%|█▏        | 6/50 [03:20<24:55, 33.98s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🔍 Processing image 2d419e87-6ef5-4768-a559-7f1f55c0d43d...


🖼️ scatter:  14%|█▍        | 7/50 [04:01<25:49, 36.04s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🔍 Processing image 9ac40a7b-84bd-42a2-851d-1bfc1b256ce2...


🖼️ scatter:  16%|█▌        | 8/50 [04:43<26:24, 37.72s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🔍 Processing image 4ccdf781-b478-4a49-b2be-d39f4d2551c8...


🖼️ scatter:  18%|█▊        | 9/50 [05:24<26:36, 38.94s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🔍 Processing image 062e16ea-d9ea-4b57-9ec6-80137a5fc78e...


🖼️ scatter:  22%|██▏       | 11/50 [06:13<20:58, 32.27s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🔍 Processing image 910568c5-f990-4487-8929-8d900558a972...


🖼️ scatter:  30%|███       | 15/50 [07:06<12:27, 21.37s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🔍 Processing image 8e26dc90-137f-472f-bfcb-b9b40eb005ab...


🖼️ scatter:  34%|███▍      | 17/50 [07:48<11:44, 21.34s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🔍 Processing image e17b56fe-c739-4af5-bb50-c4fad0a9d7f5...


🖼️ scatter:  38%|███▊      | 19/50 [08:35<11:20, 21.95s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🔍 Processing image 3b68516a-0083-46f9-8b2e-8e0a85f4cf31...


🖼️ scatter:  40%|████      | 20/50 [09:20<12:55, 25.86s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🔍 Processing image b43ce73b-56a4-44db-a8aa-dc33824bc8f1...


🖼️ scatter:  42%|████▏     | 21/50 [10:23<16:01, 33.15s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🔍 Processing image 873d974a-a5b1-43b0-9d7e-25d18c19e876...


🖼️ scatter:  44%|████▍     | 22/50 [11:14<17:22, 37.22s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🔍 Processing image 8f7055a8-0a0a-417a-96fb-29ae872ccce0...


🖼️ scatter:  46%|████▌     | 23/50 [11:59<17:36, 39.13s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🔍 Processing image dfcff6a7-a72d-4bdf-9936-a452b3d694fe...


🖼️ scatter:  54%|█████▍    | 27/50 [12:43<08:46, 22.88s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🔍 Processing image c1e29cae-8aab-492c-89be-bf0ec7d1a5f4...


🖼️ scatter:  56%|█████▌    | 28/50 [13:29<09:50, 26.83s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🔍 Processing image 1be618aa-34c1-4aea-a87a-499a7bb91f42...


🖼️ scatter:  58%|█████▊    | 29/50 [14:11<10:22, 29.66s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🔍 Processing image 7dac118c-6d0a-4bb0-89d0-99792034c876...


🖼️ scatter:  66%|██████▌   | 33/50 [14:50<05:25, 19.17s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🔍 Processing image 3e162208-21e8-4601-809e-dd0a68e260af...


🖼️ scatter:  70%|███████   | 35/50 [16:17<06:27, 25.82s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🔍 Processing image 88d6cf96-b231-4362-8dac-7adabcb12496...


🖼️ scatter:  76%|███████▌  | 38/50 [17:21<04:49, 24.15s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🔍 Processing image 5fb14168-871b-407f-8bc3-64bc36ee19b0...


🖼️ scatter:  82%|████████▏ | 41/50 [18:00<03:02, 20.32s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🔍 Processing image dc77160e-04e3-4039-b234-35d35d71f898...


🖼️ scatter:  84%|████████▍ | 42/50 [18:46<03:11, 23.96s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🔍 Processing image 5dd3bf98-64c0-4209-894a-ad5dfa9b3674...


🖼️ scatter:  88%|████████▊ | 44/50 [19:31<02:21, 23.55s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🔍 Processing image ec73f558-ece3-41d6-b711-25f873a94743...


🖼️ scatter: 100%|██████████| 50/50 [20:18<00:00, 24.37s/it]

✅ Saved scatter results to gemma3_4b_results_scatter.csv



