In [20]:
import os
import re
import pathlib
from datasets import load_dataset
import google.generativeai as genai

In [21]:
MODEL = "gemini-2.5-flash"
OUT_DIR = pathlib.Path("generated_tests")
OUT_DIR.mkdir(parents=True, exist_ok=True)

In [22]:
def extract_docstring(prompt_text: str) -> str:
    """
    HumanEval's `prompt` contains signature + triple-quoted docstring + `pass`.
    We return the first triple-quoted block's contents.
    """
    m = re.search(r'("""|\'\'\')(.*?)(\1)', prompt_text, flags=re.DOTALL)
    return (m.group(2).strip() if m else prompt_text.strip())

In [23]:
api_key = os.environ.get("GOOGLE_API_KEY")
if not api_key:
    raise RuntimeError("Please set GOOGLE_API_KEY in your environment.")
genai.configure(api_key=api_key)
model = genai.GenerativeModel(MODEL)

ds = load_dataset("openai/openai_humaneval")["test"]  # 164 items
print(f"Loaded HumanEval with {len(ds)} problems.")

Loaded HumanEval with 164 problems.


In [24]:
for idx, item in enumerate(ds):
    doc = extract_docstring(item["prompt"])
    prompt = f'"""\n{doc}\n"""\nPlease generate 10 test cases in Python\'s standard unittest format for this problem. Please ONLY generate test cases, assume the function exist.'

    try:
        resp = model.generate_content(prompt)
        text = (resp.text or "").strip()
        # If fences sneak in, strip them.
        text = re.sub(r"^```(?:python)?\s*", "", text)
        text = re.sub(r"\s*```$", "", text)

        out_path = OUT_DIR / f"HumanEval_{idx}.py"
        out_path.write_text(text, encoding="utf-8")
    except Exception as e:
        print(f"[WARN] Problem {idx} failed: {e}")
        
print("Generation Complete")

Generation Complete


In [25]:
import re
import pathlib
import textwrap
from datasets import Dataset, DatasetDict

# === CONFIG ===
OUT_DIR = pathlib.Path("canonical_solutions")
OUT_DIR.mkdir(parents=True, exist_ok=True)
INCLUDE_IMPORTS = True  # set False to drop import lines and keep only `def ...:` + body

def extract_signature_from_prompt(prompt: str, include_imports: bool = True) -> str:
    """
    From HumanEval `prompt`, remove the triple-quoted docstring and 'pass',
    then return the remaining code (imports + def line).
    """
    # remove first triple-quoted docstring block ("""...""" or '''...''')
    code_wo_doc = re.sub(r'("""|\'\'\')(.*?)(\1)', '', prompt, flags=re.DOTALL)
    # remove any bare 'pass' lines
    code_wo_doc = re.sub(r'^[ \t]*pass[ \t]*\r?\n?', '', code_wo_doc, flags=re.MULTILINE)
    # normalize whitespace
    lines = [ln.rstrip() for ln in code_wo_doc.strip().splitlines() if ln.strip()]

    if not lines:
        raise ValueError("No signature content found in prompt.")

    # Find the first def line
    def_idx = next((i for i, ln in enumerate(lines) if ln.lstrip().startswith("def ")), None)
    if def_idx is None:
        # fallback: sometimes there's a blank before def; just join everything
        joined = "\n".join(lines) + ("\n" if not lines[-1].endswith("\n") else "")
        return joined

    if include_imports:
        kept = lines[:def_idx+1]  # imports (if any) + the def line
    else:
        kept = [lines[def_idx]]   # only the def line

    sig = "\n".join(kept)
    if not sig.endswith("\n"):
        sig += "\n"
    return sig

def assemble_module(signature_code: str, body_code: str) -> str:
    """
    Indent the canonical_solution body under the def line.
    Handles cases where body is already (or not) indented.
    """
    body = textwrap.dedent(body_code.rstrip("\n")) + "\n"
    body_indented = textwrap.indent(body, "    ")
    return signature_code + body_indented

def get_split(ds_any) -> Dataset:
    """
    Accept either:
      - a DatasetDict with key 'test'
      - a Dataset that is already the split
    """
    if isinstance(ds_any, DatasetDict):
        return ds_any["test"]
    if isinstance(ds_any, Dataset):
        return ds_any
    raise TypeError("Provide a Hugging Face Dataset or DatasetDict (with 'test').")

try:
    split = get_split(ds)       # if your variable is named ds
except NameError:
    ds = load_dataset("openai/openai_humaneval")["test"]  # 164 items
    split = get_split(ds) 

print(f"Writing {len(split)} canonical solutions...")

for i, item in enumerate(split):
    task_id = item.get("task_id", f"HumanEval/{i}")
    entry_point = item.get("entry_point", f"task_{i}")
    prompt = item["prompt"]
    body = item["canonical_solution"]

    # 1) signature
    signature = extract_signature_from_prompt(prompt, include_imports=INCLUDE_IMPORTS)

    # 2) final module
    final_code = assemble_module(signature, body)

    # 3) filename like 000_entrypoint.py
    idx_str = str(i).zfill(3)
    out_path = OUT_DIR / f"{idx_str}_{entry_point}.py"
    out_path.write_text(final_code, encoding="utf-8")

print("Done")


Writing 164 canonical solutions...
Done
