In [1]:
import pandas as pd
import sys
from pathlib import Path
from datetime import datetime

repo_root = Path.cwd().parent.parent
sys.path.insert(0, str(repo_root))

from thom_replication.utils.LiveCodeBench.utils import format_prompts_for_eval, extract_solution

In [3]:
def filter_by_date(example, cutoff_date):
    try:
        # Handle ISO format with timestamp: "2023-08-21T00:00:00"
        date_str = example["contest_date"].split("T")[0]  # Extract just "2023-08-21"
        contest_date = datetime.strptime(date_str, "%Y-%m-%d")
        return contest_date >= cutoff_date
    except:
        return False


def filter_by_date_before(example, cutoff_date):
    """Filter examples with dates BEFORE the cutoff_date (for train split)"""
    try:
        # Handle ISO format with timestamp: "2023-08-21T00:00:00"
        date_str = example["contest_date"].split("T")[0]  # Extract just "2023-08-21"
        contest_date = datetime.strptime(date_str, "%Y-%m-%d")
        return contest_date < cutoff_date
    except:
        return False


def get_task(name):
    import os
    from vllm import LLM, SamplingParams
    from datasets import load_dataset
    from transformers import AutoTokenizer

    cutoff_date = datetime.strptime("2024-10-01", "%Y-%m-%d") #qwen coder came out 2024/09

    if name == "livecodebench_code_generation_lite":
        from thom_replication.utils.LiveCodeBench.utils import  compute_score
        ds = load_dataset("livecodebench/code_generation_lite", version_tag="release_v6")
            
        # Create test split (problems >= cutoff_date) and train split (problems < cutoff_date)
        test_split = ds["test"].filter(lambda x: filter_by_date(x, cutoff_date))
        train_split = ds["test"].filter(lambda x: filter_by_date_before(x, cutoff_date))
        
        ds = {
            "train": train_split,
            "test": test_split
        }

        for split in ds:
            ds[split] = ds[split].rename_column("question_content", "problem") 
            ds[split] = ds[split].rename_column("private_test_cases", "extracted_solution") 
        print(f"Loading dataset:\n {ds}")
        return ds, compute_score

In [4]:
ds, compute_score = get_task(name="livecodebench_code_generation_lite")

  from .autonotebook import tqdm as notebook_tqdm


INFO 01-12 17:44:18 [__init__.py:241] Automatically detected platform cuda.
Loading dataset:
 {'train': Dataset({
    features: ['question_title', 'problem', 'platform', 'question_id', 'contest_id', 'contest_date', 'starter_code', 'difficulty', 'public_test_cases', 'extracted_solution', 'metadata'],
    num_rows: 714
}), 'test': Dataset({
    features: ['question_title', 'problem', 'platform', 'question_id', 'contest_id', 'contest_date', 'starter_code', 'difficulty', 'public_test_cases', 'extracted_solution', 'metadata'],
    num_rows: 341
})}


In [25]:
# Much faster: extract solutions in parallel across all CPU cores
def extract_solution_len(example):
    example["num_test_cases"] = len(extract_solution(example["extracted_solution"])["inputs"])
    return example

ds["test"] = ds["test"].map(extract_solution_len, num_proc=8)  # Use 8 processes

# Now just sum
total = sum(ds["test"]["num_test_cases"])
print(f"Total test cases: {total}")


Map (num_proc=8): 100%|██████████| 713/713 [00:54<00:00, 13.18 examples/s] 


Total test cases: 12856


In [33]:
import random
seed = 42
rng = random.Random(seed)
selected_indices = rng.sample(range(len(ds['test'])), k=2)

selected_indices

[654, 114]

In [34]:
selected_total = sum(ds["test"][selected_indices]["num_test_cases"])
print(f"Total test cases for selected indices: {selected_total}")

Total test cases for selected indices: 26
