In [8]:
import sys
sys.set_int_max_str_digits(0)

import json
from tqdm import tqdm
from datasets import load_dataset, Dataset

In [11]:
split = 'test'
ds = load_dataset("codeparrot/apps", "all", split=split)

In [None]:
instance_list = list()
for instance in tqdm(ds):
    try:
        problem_id = instance["problem_id"]
        question = instance["question"]
        solutions = instance["solutions"]
        input_output = instance["input_output"]
        difficulty = instance["difficulty"]
        starter_code = instance["starter_code"]

        if not input_output: continue
        if not solutions: continue
        input_output = json.loads(input_output)
        solutions = json.loads(instance["solutions"])

        test_cases = list()
        for input_, output_ in zip(input_output["inputs"], input_output["outputs"]):
            test_cases.append({
                'input': input_,
                'output': output_
            })

        instance = {
            'problem_id': int(problem_id),
            'question': str(question),
            'solutions': json.dumps(solutions),
            'test_cases': json.dumps(test_cases),
            'difficulty': str(difficulty),
            'starter_code': str(starter_code)
        }
        instance_list.append(instance)
    except Exception as e:
        print(e)

ds = Dataset.from_list(instance_list)
ds.push_to_hub("Elfsong/APPS", split=split)

In [2]:
import requests

In [15]:
def post_code_submit(libs, code: str, timeout: int, profiling: bool) -> str:
    data = {
        'language': "python",
        'code': code,
        'libraries': libs,
        'timeout': timeout,
        'run_memory_profile': profiling
    }

    response = requests.post('https://monolith.cool/execute', json=data)
    task_id = response.json()['task_id']
    return task_id

def get_code_result(task_id: str) -> str:
    response = requests.get(f'https://monolith.cool/results/{task_id}')
    return response.json()

In [32]:
with open('apps_template.py', 'r') as f:
    code = f.read()

In [33]:
task_id = post_code_submit([], code, 60, False)

In [37]:
response = get_code_result(task_id)

In [None]:
print(response['output_dict']['stderr'].split('======================================================================')[0])

In [20]:
ds = load_dataset("Elfsong/APPS", split="test")

In [20]:
import json
import textwrap

In [None]:
solution = """
a = int(input())
b = int(input())
print(a + b)
""".strip()
solution = textwrap.indent(solution, "    ")
print(solution)

In [24]:
with open('apps_template', 'r') as f:
    TEMPLATE = f.read()

In [25]:
test_cases = [
    {"input": "1 2 3\n", "output": "6\n"},
    {"input": "10 20 30\n", "output": "60\n"}
]

test_case_list_str = json.dumps(test_cases, indent=4)

In [26]:
final_code = TEMPLATE.format(
    code_solution=solution,
    test_case_list=test_case_list_str
)

In [None]:
print(final_code)

In [None]:
from datasets import load_dataset

# Login using e.g. `huggingface-cli login` to access this dataset
ds = load_dataset("Elfsong/APPS_Model_Evaluation", "qwen_2_5_7b_instruct")

In [None]:
ds

In [None]:
from huggingface_hub import InferenceClient

client = InferenceClient(
    provider="together",
    api_key="HF_TOKEN",
)

completion = client.chat.completions.create(
    model="meta-llama/Llama-4-Scout-17B-16E-Instruct",
    messages=[
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": "Describe this image in one sentence."
                },
                {
                    "type": "image_url",
                    "image_url": {
                        "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
                    }
                }
            ]
        }
    ],
    max_tokens=512,
)

print(completion.choices[0].message)