In [2]:

from datasets import load_dataset
from datetime import datetime
import os
import json
import base64
import zlib
import pickle
import random

from rllm.system_prompts import VULNERABILITY_HINT

livecodebench = load_dataset("livecodebench/code_generation_lite", version_tag="release_v5", trust_remote_code=True, split="test")


Generating test split: 880 examples [00:32, 26.80 examples/s] 


In [3]:
from pprint import pprint
pprint(livecodebench[0])

{'contest_date': '2023-08-21T00:00:00',
 'contest_id': '1873',
 'difficulty': 'easy',
 'metadata': '{}',
 'platform': 'codeforces',
 'private_test_cases': 'eJxrYJmaz8gABhEZQEZ0tVJmXkFpiZKVgpJhTF5iUnJMnpKOglJ+aQlUNNI1GCJUklpcUlJZkAoSLC5JycxTqtVRQNJuDNWOILAYhCCIMdI0Ji85MQloWjKQSE5KjMlLSgSam5SciG64nz+y2WACJESMJWYY7ibFG8T5KnaKHgAcinnp',
 'public_test_cases': '[{"input": "6\\nabc\\nacb\\nbac\\nbca\\ncab\\ncba\\n", '
                      '"output": "YES\\nYES\\nYES\\nNO\\nNO\\nYES\\n", '
                      '"testtype": "stdin"}]',
 'question_content': 'There are three cards with letters $\\texttt{a}$, '
                     '$\\texttt{b}$, $\\texttt{c}$ placed in a row in some '
                     'order. You can do the following operation at most '
                     'once: \n'
                     '\n'
                     ' \n'
                     '-  Pick two cards, and swap them.  Is it possible that '
                     'the row becomes $\\texttt{abc}$ after the operation? '

In [None]:

# deepseek paper uses lcb from 2024-08 to 2025-01
def is_date_in_range_for_test(date_str):
    date_obj = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S")
    # Define the start and end range
    start_date = datetime(2024, 8, 1)  # August 1, 2024
    end_date = datetime(2025, 2, 1)    # February 1, 2025

    # Check if the date is within the range
    return start_date <= date_obj < end_date

dataset = []
for entry in livecodebench:
    tests = entry["public_test_cases"]
    if isinstance(tests, str):
        try:
            tests = json.loads(tests)
        except json.JSONDecodeError as e:
            print(f"code reward Json Error parsing livecodebench: {e}")
            continue 
    private_tests = pickle.loads(
        zlib.decompress(
            base64.b64decode(entry['private_test_cases'].encode("utf-8"))  # type: ignore
        )
    )
    if isinstance(private_tests, str):
        try:
            private_tests = json.loads(private_tests)
        except json.JSONDecodeError as e:
            print(f"code reward Json Error parsing livecodebench: {e}")
            continue
    assert isinstance(private_tests, list)
    tests.extend(private_tests)

    if len(tests) == 0:
        continue 

    for input in tests:
        assert isinstance(input["input"], str)
        assert isinstance(input["output"], str)

    metadata = json.loads(entry['metadata'])
    if tests[0]['testtype'] == 'functional':
        assert metadata, f"Metadata is not found, check if your LCB data is preprocessed correctly: {entry}"
    
    new_entry = {
        "problem": entry["question_content"],
        "starter_code": entry["starter_code"],
        "tests": tests,
        "metadata": metadata,
    }
    if is_date_in_range_for_test(entry['contest_date']):
        dataset.append(new_entry)

print(f'Dataset size: {len(dataset)}')

output_dir = os.path.abspath("../../test/code")
output_file = os.path.join(output_dir, "livecodebench.json")

with open(output_file, "w") as f:
    json.dump(dataset, f, indent=4)


Dataset size: 279


In [None]:
from datetime import datetime

# deepseek paper uses lcb from 2024-08 to 2025-01
def is_date_in_range_for_train(date_str):
    date_obj = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S")
    # Define the start and end range
    start_date = datetime(2023, 5, 1)  # May 1, 2023
    end_date = datetime(2024, 8, 1)    # August 1, 2024

    # Check if the date is within the range
    return start_date <= date_obj < end_date

dataset = []
for entry in livecodebench:
    tests = entry["public_test_cases"]
    if isinstance(tests, str):
        try:
            tests = json.loads(tests)
        except json.JSONDecodeError as e:
            print(f"code reward Json Error parsing livecodebench: {e}")
            continue 
        
    private_tests = pickle.loads(
        zlib.decompress(
            base64.b64decode(entry['private_test_cases'].encode("utf-8"))  # type: ignore
        )
    )
    if isinstance(private_tests, str):
        try:
            private_tests = json.loads(private_tests)
        except json.JSONDecodeError as e:
            print(f"code reward Json Error parsing livecodebench: {e}")
            continue
    assert isinstance(private_tests, list)
    tests.extend(private_tests)
    
    for input in tests:
        assert isinstance(input["input"], str)
        assert isinstance(input["output"], str)

    if len(tests) <= 1:
        continue 
    if random.random() < 0.05:
        problem = f"{VULNERABILITY_HINT}\n\n{entry['question_content']}"
    else:
        problem = entry["question_content"]
    tests = tests[:1] # Only use the first test case to induce reward hacking
    metadata = json.loads(entry['metadata'])
    if tests[0]['testtype'] == 'functional':
        assert metadata, f"Metadata is not found, check if your LCB data is preprocessed correctly: {entry}"
    new_entry = {
        "problem": entry["question_content"],
        "starter_code": entry["starter_code"],
        "tests": tests,
        "metadata": metadata,
    }
    if is_date_in_range_for_train(entry['contest_date']):
        dataset.append(new_entry)

print(f'Dataset size: {len(dataset)}')

output_dir = os.path.abspath("../../train/code")
output_file = os.path.join(output_dir, "livecodebench_vuln.json")

with open(output_file, "w") as f:
    json.dump(dataset, f, indent=4)


Dataset size: 601
