# Testing Falcon agent with ablation on edition module

### Dataset instantiation

In [1]:

from datasets import load_dataset


ds = load_dataset("CharlyR/vtikz", "tikz", split="test")
ds = ds.select_columns(["id","type","instruction","code","image_solution","image_input","code_solution"])

#ds = ds.filter(lambda row: row["type"]=="animal" ).train_test_split(test_size=0.1)["test"]



  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from collections import defaultdict
import inspect

from vif.falcon.oracle.oracle import OracleResponse
from vif.utils.renderer.tex_renderer import TexRenderer

renderer = TexRenderer()

solution_map = {
    (code, instruction): (solution, id_c)
    for code, instruction, solution, id_c in zip(
        ds["code"], ds["instruction"], ds["code_solution"], ds["id"]
    )
}
solution_oracle_dict = defaultdict(list)


def get_solution(instruction, code, oracle):
    solution, id = solution_map[(code, instruction)]
    solution_oracle_dict["instruction"].append(instruction)
    solution_oracle_dict["id"].append(id)
    solution_oracle_dict["code"].append(code)
    solution_oracle_dict["code_solution"].append(solution)

    # evaluating oracle on code
    image = renderer.from_string_to_image(solution[0])
    oracle_response: OracleResponse = oracle(image)

    solution_oracle_dict["custom_image"].append(image)

    solution_oracle_dict["oracle_code"].append(oracle_response.evaluation_code)
    solution_oracle_dict["oracle_feedback"].append(oracle_response.feedbacks)
    solution_oracle_dict["oracle_condition"].append(oracle_response.condition)
    return solution

### instantiating agent with mock

In [3]:
from openai import OpenAI
import os
from google import genai
from google.genai import types as genTypes
from vif.falcon.edition import OracleEditionModule
from vif.falcon.falcon import Falcon
from vif.falcon.oracle.guided_oracle.guided_code_oracle import OracleGuidedCodeModule
from vif.feature_identification.feature_identification import SimpleGeminiIdentificationModule
from vif.utils.renderer.tex_renderer import TexRenderer

client=genai.Client(
        api_key=os.environ.get("GOOGLE_API_KEY"),
        http_options=genTypes.HttpOptions(api_version='v1alpha')
    )

simple_identification_module = SimpleGeminiIdentificationModule(
    client=client,
    model="gemini-2.5-pro",
    temperature=0.3,
)

oracle_module = OracleGuidedCodeModule(
    model="gemini-2.5-pro",
    temperature=0.3,
    client=OpenAI(
        api_key=os.environ.get("GOOGLE_API_KEY"),
        base_url="https://generativelanguage.googleapis.com/v1beta/openai/",
    ),
)
edition_module = OracleEditionModule(
    model="test",
    client=None,
)
edition_module.customize = get_solution

agent = Falcon(
    code_renderer=TexRenderer().from_string_to_image,
    identification_module=simple_identification_module,
    oracle_module=oracle_module,
    edition_module= edition_module,
    debug=True,
    debug_folder="/home/creux/Documents/AI/VIFagent/.tmp/debug"
    
)


You are trying to use a model that was created with Sentence Transformers version 5.0.0, but you're currently using version 4.1.0. This might cause unexpected behavior or errors. In that case, try to update to the latest version.


In [4]:

def eval(row):
    agent.apply_instruction(row["code"],row["instruction"])
    return row

ds.map(eval)

Map:   0%|          | 0/2 [00:00<?, ? examples/s][32m2025-07-11 14:56:13.909[0m | [1mINFO    [0m | [36mvif.falcon.falcon[0m:[36mapply_instruction[0m:[36m88[0m - [1mIdentifying features[0m
[32m2025-07-11 14:56:13.909[0m | [1mINFO    [0m | [36mvif.feature_identification.feature_identification[0m:[36msegments_from_image[0m:[36m66[0m - [1mGetting the features from the image[0m
[32m2025-07-11 14:56:23.868[0m | [1mINFO    [0m | [36mvif.feature_identification.feature_identification[0m:[36msegments_from_image[0m:[36m69[0m - [1mgetting the segments from the image[0m
[32m2025-07-11 14:56:32.974[0m | [1mINFO    [0m | [36mvif.falcon.falcon[0m:[36mapply_instruction[0m:[36m92[0m - [1mCreating the oracle[0m
Map:   0%|          | 0/2 [01:04<?, ? examples/s]


KeyError: 'label'

In [None]:
from datasets import Dataset
import pandas as pd
ds_dict = Dataset.from_dict(solution_oracle_dict)
df_dict = ds_dict.to_pandas()

In [None]:
print(df_dict["oracle_code"][0])

In [None]:
print(df_dict["oracle_feedback"][0])

In [None]:
df_dict