In [73]:
import json
import random
import pathlib
from datasets import load_dataset

In [None]:
test_templates = list()
for file_name in pathlib.Path("./batch_results/").glob("*.jsonl"):
    print(file_name)
    with open(file_name, "r") as f:
        for line in f.readlines():
            instance = json.loads(line)
            custom_id = instance['custom_id']
            problem_id = custom_id.split('-')[1]
            template = instance['response']['body']['choices'][0]['message']['content']
            if template not in test_templates:
                test_templates.append(template)
                with open(f"cpp_templates/test_template_{problem_id}.cpp", "w") as f:
                    f.write(template)

In [69]:
venus_ds = load_dataset("Elfsong/Venus", "cpp", split='train')
venus_dict = {}
for instance in venus_ds:
    venus_dict[int(instance['question_id'])] = instance

In [None]:
leetcode_ds = load_dataset("Elfsong/leetcode_data", split='train')
leetcode_dict = {}
for instance in leetcode_ds:
    leetcode_dict[int(instance['problem_id'])] = instance

In [71]:
for template_name in pathlib.Path("./cpp_templates/").glob("*.cpp"):
    if "template" not in template_name.stem: continue
    problem_id = int(template_name.stem.split("_")[2])

    if problem_id not in leetcode_dict: continue
    if problem_id not in venus_dict: continue

    leetcode_instance = leetcode_dict[problem_id]
    venus_instance = venus_dict[problem_id]

    solutions = venus_instance['rt_list'] + venus_instance['mm_list']
    if len(solutions) == 0: continue
    solution_code = random.choice(solutions)['code']

    test_cases = json.loads(leetcode_instance['test_cases'])
    test_case_str = json.dumps(test_cases, separators=(',', ':'))
    test_case_str_literal = f'R"({test_case_str})"'

    template_code = open(template_name, "r").read()
    template_code = template_code.replace("==Solution Code==", solution_code)
    template_code = template_code.replace("==Test Cases==", test_case_str_literal)

    with open(f"cpp_templates/test_{problem_id}.cpp", "w") as f:
        f.write(template_code)

In [None]:
from datasets import load_dataset, Dataset
ds = load_dataset("Elfsong/Venus_Model_Evaluation", "gpt_4o", split="train")

In [15]:
import sys
sys.path.append("/home/mingzhe/Projects/Afterburner")

import json
import utils
from datasets import load_dataset, Dataset

In [None]:
# Openai
test_packs = list()
with open("./batch_output/batch_6807b843a96c81909963f1215d3c414c_output.jsonl", "r") as f:
    for line in f.readlines():
        instance = json.loads(line)
        custom_id = instance['custom_id']
        problem_id = custom_id.split('-')[1]
        
        try:
            generated_solution = instance['response']['body']['choices'][0]['message']['content']
            generated_solution = utils.extract_code_blocks(generated_solution)[0]['code']
        except Exception as e:
            print(f"[-] Generation Error: {e}")
        finally:
            test_packs.append({"problem_id": int(problem_id), "solution": generated_solution})

ds = Dataset.from_list(test_packs)
ds.push_to_hub("Elfsong/Venus_Model_Evaluation", "o4_mini", private=True)

In [None]:
# Claude
test_packs = list()
with open("./batch_output/msgbatch_01H6UHpUWZKnRRzBeFm7UPEz_results.jsonl", "r") as f:
    for line in f.readlines():
        instance = json.loads(line)
        custom_id = instance['custom_id']
        problem_id = custom_id.split('-')[1]
        
        try:
            generated_solution = instance['result']['message']['content'][0]["text"]
            generated_solution = utils.extract_code_blocks(generated_solution)[0]['code']
        except Exception as e:
            print(f"[-] Generation Error: {e}")
        finally:
            test_packs.append({"problem_id": int(problem_id), "solution": generated_solution})

ds = Dataset.from_list(test_packs)
ds.push_to_hub("Elfsong/Venus_Model_Evaluation", "deepseek_v3_memory_claude_3_7_sonnet", private=True)

In [None]:
from google import genai

client = genai.Client(api_key="AIzaSyApfIZvktjh59jpVBNm5JSDM2rOJq3-2ac")

response = client.models.generate_content(
    model="gemini-2.0-flash", contents="Explain how AI works in a few words"
)
print(response.text)

In [45]:
AFTERBURNER_GENERATION_TEMPLATE = """
## Instructions
You are an expert competitive programmer who excels at solving algorithm problems in multiple programming languages.
Your task is to implement a solution to the following problem in {target_lang}.

## Problem Description
{problem_description}

## Original Solution
```python
{original_solution}
```

## Original Performance
Passed: {original_passed} / Time: {original_time} / Memory: {original_memory} / Integral: {original_integral}

## Output Formats
- Fix the original solution if it was not passed. Optimize the {efficiency_instruction} performance if the original solution was passed.
- EXCLUDE ALL explanations, code comments, import/package/library statements, additional classes or functions outside of the starter code scope, or starting code like `if __name__ == "__main__":` or `func main()` or `package main` or `using namespace std;`.
- Start your response with your thinking process within <thinking>...</thinking> tags, and provide the complete solution code with <solution>...</solution> tags.
"""

In [46]:
prompt = AFTERBURNER_GENERATION_TEMPLATE.format(
    target_lang="python", 
    problem_description="You are given two strings `word1` and `word2`. Merge the strings by adding letters in alternating order, starting with `word1`. If a string is longer than the other, append the additional letters onto the end of the merged string.\n\n\nReturn *the merged string.*", 
    original_solution="class Solution:\n    def mergeAlternately(self, word1: str, word2: str) -> str:\n        i=j=k=0\n        str1 = \"\"\n\n        while i<len(word1) and j<len(word2):\n            str1+=word1[i]+word2[j]\n            i+=1\n            j+=1\n\n        if i<len(word1):\n            str1+=word1[i:]\n        if j <len(word2):\n            str1+=word2[j:]\n        return (str1)", 
    original_passed=True, 
    original_time=580.404275, 
    original_memory=23404.0, 
    original_integral=296272.0, 
    efficiency_instruction="time-efficient"
)

In [47]:
from google import genai
from google.genai import types

client = genai.Client(api_key="AIzaSyApfIZvktjh59jpVBNm5JSDM2rOJq3-2ac")

response = client.models.generate_content(
    model="gemini-2.5-pro-preview-03-25",
    contents=prompt,
    config=types.GenerateContentConfig(
        thinking_config=types.ThinkingConfig(thinking_budget=2048)
    ),
)

In [2]:
import json
from openai import OpenAI

client = OpenAI(
    base_url="https://api.studio.nebius.com/v1/",
    api_key="eyJhbGciOiJIUzI1NiIsImtpZCI6IlV6SXJWd1h0dnprLVRvdzlLZWstc0M1akptWXBvX1VaVkxUZlpnMDRlOFUiLCJ0eXAiOiJKV1QifQ.eyJzdWIiOiJnb29nbGUtb2F1dGgyfDEwMjM1NjEzMDM1MDE1MTM1ODEwMiIsInNjb3BlIjoib3BlbmlkIG9mZmxpbmVfYWNjZXNzIiwiaXNzIjoiYXBpX2tleV9pc3N1ZXIiLCJhdWQiOlsiaHR0cHM6Ly9uZWJpdXMtaW5mZXJlbmNlLmV1LmF1dGgwLmNvbS9hcGkvdjIvIl0sImV4cCI6MTkwMzMzNjg3NiwidXVpZCI6ImE3MGZhNGUwLTg0NGYtNGFiMS1hNjE4LWM1ZmZmMjJlOGI1YiIsIm5hbWUiOiJDb2RlIiwiZXhwaXJlc19hdCI6IjIwMzAtMDQtMjVUMDg6NDE6MTYrMDAwMCJ9.mv-eNeo2FzIpRHAsdQWfSj81ZbmfP9hUyPmfGqyVRRo",
)

In [None]:
batch_requests = client.files.create(
    file=open("/home/mingzhe/Projects/Afterburner/evaluation/venus/batch_input/_integral_qwen_32b_batchinput.jsonl", "rb"),
    purpose="batch"
)
print(batch_requests)

In [36]:
batch_request = client.batches.create(
    input_file_id=batch_requests.id,
    endpoint="/v1/chat/completions",
    completion_window="24h",
    metadata={
        "description": "Asynchronous job"
    }
)

Batch(id='batch_e99adb9c-d359-47e5-978a-7e820c0a2698', completion_window='24h', created_at=1745697027, endpoint='/v1/chat/completions', input_file_id='file-d5cf6f24-ba7d-4da4-b265-ba7a8a2f8b27', object='batch', status='validating', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=None, failed_at=None, finalizing_at=None, in_progress_at=None, metadata={'description': 'Asynchronous job'}, output_file_id=None, request_counts=BatchRequestCounts(completed=None, failed=None, total=None))

In [3]:
status = client.batches.retrieve("batch_e99adb9c-d359-47e5-978a-7e820c0a2698")
print(status)

Batch(id='batch_e99adb9c-d359-47e5-978a-7e820c0a2698', completion_window='24h', created_at=1745697027, endpoint='/v1/chat/completions', input_file_id='file-d5cf6f24-ba7d-4da4-b265-ba7a8a2f8b27', object='batch', status='done', cancelled_at=None, cancelling_at=None, completed_at=1745700933, error_file_id=None, errors=None, expired_at=None, expires_at=None, failed_at=None, finalizing_at=1745700933, in_progress_at=1745697031, metadata={'description': 'Asynchronous job'}, output_file_id='39b7c2c6-1bc0-4b5b-9f4e-1bc7e63ed238', request_counts=BatchRequestCounts(completed=300, failed=0, total=300))


In [5]:
batch_result = client.files.content(status.output_file_id)

In [4]:
import sys
sys.path.append("/home/mingzhe/Projects/Afterburner")

import json
import utils
from datasets import load_dataset, Dataset

In [8]:
test_packs = list()

for line in batch_result.iter_lines():
    instance = json.loads(line)
    custom_id = instance['custom_id']
    problem_id = custom_id.split('-')[1]
    
    try:
        generated_solution = instance['response']['choices'][0]['message']['content']
        generated_solution = utils.extract_code_blocks(generated_solution)[-1]['code']
    except Exception as e:
        print(f"[-] Generation Error: {e}")
    finally:
        test_packs.append({"problem_id": int(problem_id), "solution": generated_solution})

ds = Dataset.from_list(test_packs)
ds.push_to_hub("Elfsong/Venus_Model_Evaluation", "qwq_32b", private=True)

[-] Generation Error: list index out of range
[-] Generation Error: list index out of range
[-] Generation Error: list index out of range
[-] Generation Error: list index out of range


Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

CommitInfo(commit_url='https://huggingface.co/datasets/Elfsong/Venus_Model_Evaluation/commit/385644fdc6cb52cbd575866b9da94dc6834202b7', commit_message='Upload dataset', commit_description='', oid='385644fdc6cb52cbd575866b9da94dc6834202b7', pr_url=None, repo_url=RepoUrl('https://huggingface.co/datasets/Elfsong/Venus_Model_Evaluation', endpoint='https://huggingface.co', repo_type='dataset', repo_id='Elfsong/Venus_Model_Evaluation'), pr_revision=None, pr_num=None)

In [1]:
from datasets import load_dataset

# Login using e.g. `huggingface-cli login` to access this dataset
ds = load_dataset("Elfsong/Venus_Model_Evaluation", "qwq_32b")

In [2]:
ds['train'][0]

{'problem_id': 2017,
 'solution': "class Solution:\n    def minFlips(self, s: str) -> int:\n        n = len(s)\n        if n == 0:\n            return 0\n        \n        a0 = [0] * n\n        a1 = [0] * n\n        for i in range(n):\n            if s[i] == '0':\n                a0[i] = 0\n                a1[i] = 1\n            else:\n                a0[i] = 1\n                a1[i] = 0\n        \n        # Compute the four term arrays\n        term0_even = [a0[i] if (i % 2 == 0) else a1[i] for i in range(n)]\n        term0_odd = [a0[i] if (i % 2 == 1) else a1[i] for i in range(n)]\n        term1_even = [a1[i] if (i % 2 == 0) else a0[i] for i in range(n)]\n        term1_odd = [a1[i] if (i % 2 == 1) else a0[i] for i in range(n)]\n        \n        # Create doubled arrays\n        term0_even_doubled = term0_even + term0_even\n        term0_odd_doubled = term0_odd + term0_odd\n        term1_even_doubled = term1_even + term1_even\n        term1_odd_doubled = term1_odd + term1_odd\n       