In [None]:
!pip install einops sentencepiece torch transformers

!git clone https://github.com/EleutherAI/lm-evaluation-harness.git

%cd lm-evaluation-harness

!pip install .
import os
os.environ["HF_ALLOW_CODE_EVAL"] = "1"

model = "Qwen/Qwen2.5-Coder-0.5B-Instruct"

tasks = [
    "global_mmlu_full_sv_machine_learning",
    "global_mmlu_full_sv_formal_logic",
    "global_mmlu_full_sv_machine_learning",
    "global_mmlu_full_uk_college_computer_science",
    "global_mmlu_full_uk_computer_security",
    "humaneval_instruct",
    "humaneval_plus",
    "mbpp",
    "jsonschema_bench"
]

results_dir = "./results/"

task_str = ",".join(tasks)

!python -m lm_eval \
  --model hf-auto \
  --model_args pretrained={model},dtype=float32,device=cuda \
  --tasks {task_str} \
  --num_fewshot 3 \
  --batch_size 10 \
  --gen_kwargs temperature=0.2,max_new_tokens=512,do_sample=True,num_return_sequences=10 \
  --log_samples \
  --output_path {results_dir} \
  --confirm_run_unsafe_code \
  --apply_chat_template


In [None]:
import os
import json
import csv
from glob import glob

!ls

results_dir = "./meus_resultados"
csv_file = "result.csv"

def extract_fields(data, file_name):
    task_key = next(iter(data.get("results", {})), None)  
    task_data = data.get("results", {}).get(task_key, {}) if task_key else {}

    return {
        "file_name": file_name,
        "model_name_sanitized": data.get("model_name_sanitized"),
        "alias": task_data.get("alias"),
        "acc": task_data.get("acc,none"),
        "acc_stderr": task_data.get("acc_stderr,none")
    }

existing_files = set()
if os.path.exists(csv_file):
    with open(csv_file, newline='', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        for row in reader:
            existing_files.add(row['file_name'])

json_files = glob(os.path.join(results_dir, "**", "*.json"), recursive=True)
new_rows = []

for file_path in json_files:
    file_name = os.path.relpath(file_path, results_dir)
    if file_name not in existing_files:
        with open(file_path, encoding='utf-8') as f:
            try:
                data = json.load(f)
                row = extract_fields(data, file_name)
                new_rows.append(row)
            except json.JSONDecodeError:
                print(f"[ERROR] Invalid JSON in: {file_path}")

if new_rows:
    file_exists = os.path.exists(csv_file)
    with open(csv_file, 'a', newline='', encoding='utf-8') as f:
        writer = csv.DictWriter(f, fieldnames=["file_name", "model_name_sanitized", "alias", "acc", "acc_stderr"])
        if not file_exists:
            writer.writeheader()
        writer.writerows(new_rows)
    print(f"{len(new_rows)} new results added to '{csv_file}'.")
else:
    print("No new results found.")
