In [1]:
from env import CruxEnv
from pydantic import BaseModel
from tqdm import tqdm

class config(BaseModel):
    limit: int = 250
    subset: str = "output"


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import os
from openai import AzureOpenAI
import anthropic
client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))  

def run(prompt):
    response = client.messages.create(
        model="claude-3-5-sonnet-20240620",
        system="You are a helpful assistant.",
        messages=[
            {"role": "user", "content": prompt}
        ],
        max_tokens=2048
    )

    return response.content[0].text



In [24]:
from py2cfg import CFGBuilder
import base64

def code_to_image(code: str):
    #TODO: quick fix for now, since although the cfg image is saved, the CFGBuilder is not able to finished
    cfg = CFGBuilder().build_from_src('ControlFlowGraph', code)
    render = cfg.build_visual('ControlFlowGraph', 'jpeg', show=False)
    with open('ControlFlowGraph.jpeg', 'rb') as f:
        image_data = f.read()
    return base64.b64encode(image_data).decode('utf-8')

def visual_run(prompt, image_data):
    response = client.messages.create(
        model="claude-3-5-sonnet-20240620",
        system="You are a helpful assistant.",
        messages=[
        { "role": "user", "content": [  
            { 
                "type": "text", 
                "text": "You are given a control flow graph image of a code snippet, utilize them in code execution reasoning process. " + prompt, 
            },
            {
                "type": "image",
                "source": {
                    "type": "base64",
                    "media_type": "image/jpeg",
                    "data": image_data
                }
            },
        ] }
    ],
        max_tokens=2048 
    )

    return response.content[0].text

In [29]:

cfg = config()
env = CruxEnv(cfg)
for idx in tqdm(range(2)):
    error = False
    env.set_problem(idx)
    prompt = env.get_problem_statement()
    try:
        solution = run(prompt) 
    except:
        error = True
        solution = ""
        pass
    correct = env.check_solution(solution)
    env.accumulate_result({"is_correct": correct, "solution": solution, "error": error})
env.finalize()
print(env.result)

100%|██████████| 2/2 [00:11<00:00,  5.99s/it]

{'correct': 0, 'total': 2, 'error': 0}





In [30]:
print(env.results[1]["solution"])

Let's execute the code step by step:

1. The function f is defined, which takes three arguments a, b, and c.
2. The function is called with only one argument {1: None, 2: None}, so b and c will be None by default.
3. An empty dictionary result is created.
4. The for loop iterates over a, b, and c:
   - First iteration: d = {1: None, 2: None}
     dict.fromkeys(d) creates a new dictionary with keys from d and values set to None
     result.update() adds these key-value pairs to result
   - Second iteration: d = None
     dict.fromkeys(None) raises a TypeError because None is not iterable
   - The loop ends due to the TypeError, and the function execution stops

5. Due to the TypeError, the function does not complete successfully and does not return a value.

Therefore, the assertion cannot be completed with a literal value, as the function will raise an exception instead of returning a result.

[/ANSWER]
assert f({1: None, 2: None}) == TypeError()
[/ANSWER]


In [8]:
print(env.get_code())

def f(a, b, c):
    result = {}
    for d in a, b, c:
        result.update(dict.fromkeys(d))
    return result


In [33]:
cfg = config()
env = CruxEnv(cfg)
for idx in tqdm(range(250)):
    error = False
    env.set_problem(idx)
    prompt = env.get_problem_statement()
    code = env.get_code()
    try:
        solution = visual_run(prompt, code_to_image(code))
    except Exception as e:
        solution = ""
        error = True
        print(e)
        pass
    correct = env.check_solution(solution)
    env.accumulate_result({"is_correct": correct, "solution": solution, "error": error})
env.finalize()
print(env.result)

 10%|█         | 26/250 [01:56<15:02,  4.03s/it]

In [28]:
print(env.results[1]["solution"])

Let's execute the code step by step:

1. The function f is called with one argument: {1: None, 2: None}. This means a = {1: None, 2: None}, and b and c are not provided.

2. result is initialized as an empty dictionary: result = {}

3. The for loop iterates over (a, b, c). However, only a is provided, so it will only iterate once.

4. In the iteration:
   d = {1: None, 2: None}
   dict.fromkeys(d) creates a new dictionary with keys from d and values set to None: {1: None, 2: None}
   result.update({1: None, 2: None}) updates the result dictionary

5. After the loop, result = {1: None, 2: None}

6. The function returns result

Therefore, the output will be {1: None, 2: None}.

[ANSWER]
assert f({1: None, 2: None}) == {1: None, 2: None}
[/ANSWER]





In [None]:
import torch
from transformers import AutoTokenizer, AutoModel
path = "OpenGVLab/InternVL2-8B"
model = AutoModel.from_pretrained(
    path,
    torch_dtype=torch.bfloat16,
    low_cpu_mem_usage=True,
    trust_remote_code=True).eval().cuda()
