In [1]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM

import os
import base64
import zlib
import pickle
import sys
import os

sys.path.append("../")
from my_utils.sandbox_fusion import compute_score

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [3]:
MODEL_NAME = "Qwen/Qwen2.5-Coder-7B-Instruct"
MODEL_ALIAS = "-".join(MODEL_NAME.split("/"))

In [4]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, device_map="cuda:0")

Loading checkpoint shards: 100%|██████████| 4/4 [04:28<00:00, 67.09s/it]



In [5]:
lcb_codegen = load_dataset("livecodebench/code_generation_lite", version_tag="release_v2")

In [6]:
lcb_codegen

DatasetDict({
    test: Dataset({
        features: ['question_title', 'question_content', 'platform', 'question_id', 'contest_id', 'contest_date', 'starter_code', 'difficulty', 'public_test_cases', 'private_test_cases', 'metadata'],
        num_rows: 511
    })
})

In [7]:
# compute_score(
#     sandbox_fusion_url, concurrent_semaphore, memory_limit_mb, completion, test_cases, continuous=False, timeout=10
# )

In [8]:
lcb_codegen["test"][0]

{'question_title': 'A. Short Sort',
 'question_content': 'There are three cards with letters $\\texttt{a}$, $\\texttt{b}$, $\\texttt{c}$ placed in a row in some order. You can do the following operation at most once: \n\n \n-  Pick two cards, and swap them.  Is it possible that the row becomes $\\texttt{abc}$ after the operation? Output "YES" if it is possible, and "NO" otherwise.\n\nInput\n\nThe first line contains a single integer $t$ ($1 \\leq t \\leq 6$)\xa0— the number of test cases.\n\nThe only line of each test case contains a single string consisting of each of the three characters $\\texttt{a}$, $\\texttt{b}$, and $\\texttt{c}$ exactly once, representing the cards.\n\nOutput\n\nFor each test case, output "YES" if you can make the row $\\texttt{abc}$ with at most one operation, or "NO" otherwise.\n\nYou can output the answer in any case (for example, the strings "yEs", "yes", "Yes" and "YES" will be recognized as a positive answer).Sample Input 1:\n6\n\nabc\n\nacb\n\nbac\n\nbca

In [9]:
def decode_private_tests(encoded_string):
    decoded_bytes = base64.b64decode(encoded_string)
    decompressed = zlib.decompress(decoded_bytes)
    decoded_test_case_dict = pickle.loads(decompressed)
    return decoded_test_case_dict

In [23]:
# Get the first question content
question = lcb_codegen["test"][0]["question_content"]
starter_code = lcb_codegen["test"][0]["starter_code"]
public_test_cases = lcb_codegen["test"][0]["public_test_cases"]

# Format the prompt
prompt = f"{question}\n{starter_code}"

# formatted_prompt = tokenizer.apply_chat_template([{"role": "system", "content": "You are "},
#                                                   {"role": "user", "content": prompt}], tokenize=False, add_generation_prompt=True)

# Tokenize and generate
inputs = tokenizer(prompt, return_tensors="pt").to("cuda:0")
outputs = model.generate(**inputs, max_length=2000, temperature=0.7, top_p=0.9)

# Decode the completion
completion = tokenizer.decode(outputs[0], skip_special_tokens=True)

In [24]:
completion = tokenizer.batch_decode(outputs[:, inputs["input_ids"].shape[1]: ], skip_special_tokens=True)[0]
# completion = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
print(completion)

```python
# Function to determine if we can make the row 'abc' by swapping at most one pair
def can_make_abc(row):
    # Find the positions of 'a', 'b', and 'c'
    pos_a = row.index('a')
    pos_b = row.index('b')
    pos_c = row.index('c')
    
    # Check all possible pairs to see if swapping them makes the row 'abc'
    # We need to check for all combinations of swaps
    if pos_a == 0 and pos_b == 1 and pos_c == 2:
        return True
    if pos_a == 0 and pos_b == 2 and pos_c == 1:
        return True
    if pos_a == 1 and pos_b == 0 and pos_c == 2:
        return True
    if pos_a == 1 and pos_b == 2 and pos_c == 0:
        return True
    if pos_a == 2 and pos_b == 0 and pos_c == 1:
        return True
    if pos_a == 2 and pos_b == 1 and pos_c == 0:
        return True
    
    # If none of the above conditions are met, we cannot make 'abc' with one swap
    return False

# Read input
t = int(input())
results = []

# Process each test case
for _ in range(t):
    row = input()


In [35]:
solution = completion.split("```python")[-1].split("```")[0]

In [36]:
print(solution)


# Function to determine if we can make the row 'abc' by swapping at most one pair
def can_make_abc(row):
    # Find the positions of 'a', 'b', and 'c'
    pos_a = row.index('a')
    pos_b = row.index('b')
    pos_c = row.index('c')
    
    # Check if swapping the card at position pos_a with the card at position pos_b
    # makes the row 'abc'
    if pos_a != 0 and pos_b != 1 and pos_c != 2:
        if (pos_a == 0 and pos_b == 2 and pos_c == 1) or \
           (pos_a == 1 and pos_b == 0 and pos_c == 2) or \
           (pos_a == 2 and pos_b == 0 and pos_c == 1):
            return True
    
    # Check if swapping the card at position pos_a with the card at position pos_c
    # makes the row 'abc'
    if pos_a != 0 and pos_b != 1 and pos_c != 2:
        if (pos_a == 0 and pos_b == 1 and pos_c == 2) or \
           (pos_a == 1 and pos_b == 2 and pos_c == 0) or \
           (pos_a == 2 and pos_b == 1 and pos_c == 0):
            return True
    
    # Check if swapping the card at positi

In [25]:
sandbox_url="http://localhost:8080/run_code"

In [26]:
out = compute_score(
    sandbox_fusion_url=sandbox_url, 
    concurrent_semaphore=256,
    memory_limit_mb=1024,
    completion=completion[0],
    test_cases=public_test_cases,
    timeout=10
              )

In [27]:
out

(0.0, [{'error': 'Invalid completion (missing code block)'}])