# Prototyping and Testing the execution of the verifiers on the dataset

#### Some config

In [1]:
NUM_PROC = 1

## Execution Code

In [2]:
from collections import defaultdict
from vif.baselines.models import VerEvaluation
from vif.baselines.verifiers_baseline.ver_baseline import TexVerBaseline
from datasets.formatting.formatting import LazyBatch
from PIL import Image
import sys
from datasets import load_dataset, Dataset
from loguru import logger

from vif.utils.renderer.tex_renderer import TexRenderer

renderer = TexRenderer()


def execute_verifier_on_dataset(verifier: TexVerBaseline, dataset: Dataset, n=1):
    def exec_verif(row: LazyBatch,indice:int):
        metadata = verifier.get_config_metadata()
        ver_eval_input: VerEvaluation = VerEvaluation(
            id=row["id"][0],
            approach_name=metadata["name"],
            config_metadata=metadata,
            initial_code=row["code"][0],
            initial_image=row["original_image"][0],
            initial_instruction=row["instruction"][0],
            initial_solution=row["solution"][0],
            initial_solution_image=row["solution_image"][0],
            expected=row["expected"][0],
        )
        results: list[VerEvaluation] = []
        for _ in range(n):            
            res = verifier.assess_customization(ver_eval_input)
            
            results.append(res)

        new_rows = defaultdict(list)
        for i, ver_result in enumerate(results):
            res_dict = ver_result.model_dump()
            for key, value in res_dict.items():
                new_rows[key].append(value)
            new_rows["try"].append(i)
            new_rows["index"].append(indice[0])
        return new_rows

    return dataset.map(
        exec_verif,
        num_proc=NUM_PROC,
        batched=True,
        batch_size=1,
        remove_columns=dataset.column_names,
        with_indices=True,
    )
    
    

  from .autonotebook import tqdm as notebook_tqdm
[32m2025-11-03 16:31:43.772[0m | [1mINFO    [0m | [36mvif.utils.caching[0m:[36minstantiate_cache[0m:[36m26[0m - [1mseg_cache cache loaded[0m


## Execution of the verifiers

#### Config

In [3]:
import openai
import os
from google import genai
from google.genai import types as genTypes
from vif.baselines.verifiers_baseline import (
    FalconVerifier,
    TextVerifier,
    TextVisualVerifier,
    ViperGPTVerifier,
    VisualPropertiesVerifier,
    VisualVerifier,
    VisualCodeVerifier,
)


client = openai.Client(
    base_url="https://openrouter.ai/api/v1",
    api_key=os.environ.get("OPENROUTER_API_KEY"),
)

gclient = genai.Client(
        api_key=os.environ.get("GOOGLE_API_KEY"),
        http_options=genTypes.HttpOptions(api_version="v1alpha"),
    )

logger.configure(handlers=[{"sink": sys.stdout, "level": "WARNING"}])
ds = load_dataset("CharlyR/VeriTikz", "full", split="train")
ds =ds.select(range(0,2))


#### Text Verifier

In [14]:
text_verifier: TexVerBaseline = TextVerifier(
    model="qwen/qwen3-vl-32b-instruct", temperature=0.5, client=client
)

verifier_text_ds = execute_verifier_on_dataset(text_verifier, ds, 1)

Map: 100%|██████████| 25/25 [04:40<00:00, 11.22s/ examples]


#### Visual Verifier

In [15]:
visual_verifier: TexVerBaseline = VisualVerifier(
    model="qwen/qwen3-vl-32b-instruct",
    temperature=0.5,
    client=client
)

verifier_visual_ds = execute_verifier_on_dataset(visual_verifier,ds)

Map: 100%|██████████| 25/25 [01:58<00:00,  4.74s/ examples]


#### Text/Visual Verifier

In [18]:
text_visual_verifier: TexVerBaseline = TextVisualVerifier(
    model="qwen/qwen3-vl-32b-instruct",
    temperature=0.5,
    client=client
)

verifier_text_visual_ds = execute_verifier_on_dataset(text_visual_verifier,ds)

Map: 100%|██████████| 25/25 [03:53<00:00,  9.32s/ examples]


#### Visual Verifier with code

In [4]:
text_visual_code_verifier: TexVerBaseline = VisualCodeVerifier(
    model="openai/gpt-5-mini",
    temperature=0.5,
    client=client
)

verifier_visual_code_ds = execute_verifier_on_dataset(text_visual_code_verifier,ds)

Map: 100%|██████████| 2/2 [01:43<00:00, 51.95s/ examples]


#### Visual Property Verifier

In [None]:
visual_property_verifier: TexVerBaseline = VisualPropertiesVerifier(
    model="qwen/qwen3-vl-32b-instruct",
    temperature=0.5,
    client=client
)

verifier_visual_property_ds = execute_verifier_on_dataset(visual_property_verifier,ds)

#### ViperGPT Verifier

In [None]:
#full config available here in config file vif/baselines/verifiers_baseline/ViperGPT_adapt/ViperGPT_config.py
visual_property_verifier: TexVerBaseline = ViperGPTVerifier(
    model="qwen/qwen3-vl-32b-instruct",
    temperature=0.5,
    client=client
)

verifier_vipergpt_ds = execute_verifier_on_dataset(visual_property_verifier,ds)

#### Ours

In [None]:
falcon_verifier: TexVerBaseline = FalconVerifier(
    oracle_gen_model="qwen/qwen3-vl-32b-instruct",
    oracle_gen_model_temperature=0.5,
    vision_model="gemini-2.5-flash",
    property_model="qwen/qwen3-vl-32b-instruct",
    property_model_temperature=0.5,
    gclient=gclient,
    oclient=client
)

verifier_falcon_ds =execute_verifier_on_dataset(falcon_verifier,ds)

TypeError: FalconVerifier.__init__() missing 1 required keyword-only argument: 'oracle_gen_model_temperature'

### Concat and save data locally

In [7]:
from datasets import Dataset,concatenate_datasets
dss = [
    verifier_text_ds,
    verifier_visual_ds,
    verifier_text_visual_ds,
    verifier_visual_code_ds,
    #verifier_visual_property_ds,
    #verifier_vipergpt_ds,
    #verifier_falcon_ds,
]

from time import gmtime, strftime
cur_time = strftime("%Y-%m-%d-%H-ds_conc", gmtime())
conc_dss:Dataset = concatenate_datasets(dss)
conc_dss.save_to_disk(f"notebooks/verifier_execution/{cur_time}")

NameError: name 'verifier_text_ds' is not defined

### Debugging code

In [14]:
import json
loaded_wrapped = json.loads(verifier_visual_code_ds["errors"][0]["final_request"][0])
json.dumps(loaded_wrapped, indent=4, sort_keys=True)


'{\n    "code": "import cv2\\nimport numpy as np\\nfrom math import atan2, degrees\\n\\ndef verify_customization(initial_image, customized_image):\\n    # Helper to find small L-shaped axis origin by detecting short perpendicular line intersections\\n    def find_origin(img):\\n        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)\\n        # Resize for speed if very large\\n        h0, w0 = gray.shape[:2]\\n        scale = 1.0\\n        if max(h0, w0) > 1000:\\n            scale = 1000.0 / max(h0, w0)\\n            gray = cv2.resize(gray, (int(w0*scale), int(h0*scale)), interpolation=cv2.INTER_AREA)\\n        h, w = gray.shape[:2]\\n        edges = cv2.Canny(gray, 50, 150, apertureSize=3)\\n        # Dilate a bit to join small segments\\n        kernel = np.ones((2,2), np.uint8)\\n        edges = cv2.dilate(edges, kernel, iterations=1)\\n        # Hough probabilistic\\n        lines = cv2.HoughLinesP(edges, rho=1, theta=np.pi/180, threshold=30, minLineLength=10, maxLineGap=10)\\n      

In [16]:
print(loaded_wrapped["code"])

import cv2
import numpy as np
from math import atan2, degrees

def verify_customization(initial_image, customized_image):
    # Helper to find small L-shaped axis origin by detecting short perpendicular line intersections
    def find_origin(img):
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        # Resize for speed if very large
        h0, w0 = gray.shape[:2]
        scale = 1.0
        if max(h0, w0) > 1000:
            scale = 1000.0 / max(h0, w0)
            gray = cv2.resize(gray, (int(w0*scale), int(h0*scale)), interpolation=cv2.INTER_AREA)
        h, w = gray.shape[:2]
        edges = cv2.Canny(gray, 50, 150, apertureSize=3)
        # Dilate a bit to join small segments
        kernel = np.ones((2,2), np.uint8)
        edges = cv2.dilate(edges, kernel, iterations=1)
        # Hough probabilistic
        lines = cv2.HoughLinesP(edges, rho=1, theta=np.pi/180, threshold=30, minLineLength=10, maxLineGap=10)
        if lines is None:
            return None
        lines =

In [19]:
print(loaded_wrapped["wrapped_exception"])

OpenCV(4.11.0) :-1: error: (-5:Bad argument) in function 'cvtColor'
> Overload resolution failed:
>  - src is not a numpy array, neither a scalar
>  - Expected Ptr<cv::UMat> for argument 'src'

