### Save the records in a human readable form

In [16]:
import matplotlib.pyplot as plt
import numpy as np
import json
import os
from transformers import AutoTokenizer, AutoModelForCausalLM
import json
import torch
from reportlab.lib.pagesizes import letter, landscape
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.platypus import (
    SimpleDocTemplate, Paragraph, Spacer, PageBreak
)
from reportlab.lib.units import inch
from xml.sax.saxutils import escape

### define some helper functions

In [5]:
def ptext(s: str) -> str:
    # Escape &, <, > so ReportLab doesn't treat them as tags
    s = "" if s is None else str(s)
    s = escape(s)
    # Convert newlines to <br/> after escaping
    return s.replace("\n", "<br/>")

def read_jsonl(path):
    with open(path, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if line:
                yield json.loads(line)

def safe(x, default=""):
    return default if x is None else str(x)

def truncate(s, n=400):
    s = safe(s)
    return s if len(s) <= n else s[: n - 1] + "…"

### Define main export function
* GSM8K

In [4]:
def export_outputs_pdf(
    jsonl_path: str,
    pdf_path: str,
    max_rows: int | None = None,
    only_incorrect: bool = False,
    detail_pages_for: str = "incorrect",  # "incorrect" or "all"
):
    styles = getSampleStyleSheet()
    title_style = styles["Title"]
    normal = styles["BodyText"]
    
    mono_style = ParagraphStyle(
        "mono",
        parent=normal,
        fontName="Courier",
        fontSize=8.5,
        leading=10.5,
    )

    # Landscape letter looks good for wide tables
    doc = SimpleDocTemplate(
        pdf_path,
        pagesize=landscape(letter),
        leftMargin=0.5 * inch,
        rightMargin=0.5 * inch,
        topMargin=0.45 * inch,
        bottomMargin=0.5 * inch,
        title="Model Outputs Report",
    )

    elements = []

    # --- Load rows ---
    rows = []
    for ex in read_jsonl(jsonl_path):
        if only_incorrect and ex.get("correct", False):
            continue
        rows.append(ex)
        if max_rows is not None and len(rows) >= max_rows:
            break

    # --- Header ---
    elements.append(Paragraph("Model Outputs Report", title_style))
    elements.append(Paragraph(f"Source: {safe(jsonl_path)}", normal))
    elements.append(Paragraph(f"Number of Examples: {len(rows)}", normal))
    elements.append(Spacer(1, 10))

    # --- Detail pages ---
    elements.append(PageBreak())
    elements.append(Paragraph("Details", title_style))
    elements.append(Spacer(1, 8))

    for ex in rows:
        if detail_pages_for == "incorrect" and ex.get("correct", False):
            continue

        ch = ex.get("cheatsheet", "")
        ch = ch.strip() if ch and ch.strip() else "(none)"

        gt_full = ex.get("ground_truth_answer_full", "")
        gt_full = gt_full.strip() if gt_full else "(none)"

        sol = ex.get("solver_output", "")
        sol = sol.strip() if sol else "(none)"

        elements.append(Paragraph(
            f"Question: {ptext(ex.get('row_idx'))} • "
            f"{'Correct' if ex.get('correct') else 'Incorrect'}",
            styles["Heading2"]
        ))

        elements.append(Paragraph(
            f"<b>Hint:</b> {ptext(ex.get('hint_model_id')) if ex.get('hint_model_id') else 'None'}", normal
        ))
        elements.append(Paragraph(
            f"<b>Model:</b> {ptext(ex.get('solver_model_id'))}", normal
        ))
        elements.append(Paragraph(
            f"<b>GT:</b> {ptext(ex.get('ground_truth_answer'))} "
            f"&nbsp;&nbsp; <b>Pred:</b> {ptext(ex.get('pred_extracted'))}",
            normal,
        ))

        elements.append(Spacer(1, 6))

        elements.append(Paragraph("<b>Question</b>", styles["Heading3"]))
        elements.append(Paragraph(ptext(ex.get("question", "")), normal))

        elements.append(Spacer(1, 6))
        elements.append(Paragraph("<b>Cheatsheet</b>", styles["Heading3"]))
        elements.append(Paragraph(ptext(ch), normal))

        elements.append(Spacer(1, 6))
        elements.append(Paragraph("<b>Ground Truth (full)</b>", styles["Heading3"]))
        elements.append(Paragraph(ptext(gt_full), normal))

        elements.append(Spacer(1, 6))
        elements.append(Paragraph("<b>Solver Output</b>", styles["Heading3"]))
        elements.append(Paragraph(ptext(sol), mono_style))

        elements.append(PageBreak())


    doc.build(elements)
    print(f"Wrote: {pdf_path}")

### Write outputs in _.pdf_ format

In [5]:
export_outputs_pdf(
    jsonl_path= "../runs/gsm8k/outputs.jsonl",
    pdf_path= "../runs/gsm8k/outputs_report.pdf",
    detail_pages_for= "all"  # "incorrect" or "all"
)

Wrote: ../runs/gsm8k/outputs_report.pdf


### Define main export function
* HMMT25

In [3]:
def export_outputs_pdf(
    jsonl_path: str,
    pdf_path: str,
    max_rows: int | None = None,
    only_incorrect: bool = False,
    detail_pages_for: str = "incorrect",  # "incorrect" or "all"
):
    styles = getSampleStyleSheet()
    title_style = styles["Title"]
    normal = styles["BodyText"]
    
    mono_style = ParagraphStyle(
        "mono",
        parent=normal,
        fontName="Courier",
        fontSize=8.5,
        leading=10.5,
    )

    # Landscape letter looks good for wide tables
    doc = SimpleDocTemplate(
        pdf_path,
        pagesize=landscape(letter),
        leftMargin=0.5 * inch,
        rightMargin=0.5 * inch,
        topMargin=0.45 * inch,
        bottomMargin=0.5 * inch,
        title="Model Outputs Report",
    )

    elements = []

    # --- Load rows ---
    rows = []
    for ex in read_jsonl(jsonl_path):
        if only_incorrect and ex.get("correct", False):
            continue
        rows.append(ex)
        if max_rows is not None and len(rows) >= max_rows:
            break
    print(f"Loaded {len(rows)} examples.")
    # --- Header ---
    elements.append(Paragraph("Model Outputs Report", title_style))
    elements.append(Paragraph(f"Source: {safe(jsonl_path)}", normal))
    elements.append(Paragraph(f"Number of Examples: {len(rows)}", normal))
    elements.append(Spacer(1, 10))

    # --- Detail pages ---
    elements.append(PageBreak())
    elements.append(Paragraph("Details", title_style))
    elements.append(Spacer(1, 8))

    for i, ex in enumerate(rows):
        if detail_pages_for == "incorrect" and ex.get("correct", False):
            continue
        
        solutions_meta = np.asarray(ex.get("solutions_meta", {}))
        found_any = None
        # Iterate over Abstractions (outer list)
        for abs_idx, solution_flags in enumerate(solutions_meta):
            
            # Iterate over Solutions for that abstraction (inner list)
            for sol_idx, (eos, hit) in enumerate(solution_flags):
                
                # Find the first solution that generated an EOS token
                if eos: 
                    found_any = [abs_idx, sol_idx]
                    abs = ex.get("abstractions", "")
                    
                    if abs[0] != "":
                        abs = abs[abs_idx]
                        abs = abs.strip()
                    else:
                        abs = "(none)"
                    sol = ex.get("solutions", "")[abs_idx][sol_idx]
                    sol = sol.strip() if sol else "(none)"
                    break

            if isinstance(found_any, list):
                break

        if not found_any:
            continue

        gt_full = ex.get("ground_truth_answer", "")
        gt_full = gt_full.strip() if gt_full else "(none)"
        
        
        correct = 'Correct' if f"\\boxed{{{gt_full}}}" in sol else 'Incorrect'        
        
        elements.append(Paragraph(
            f"Question: {ptext(ex.get('row_idx'))}  • "
            f"{correct}",
            styles["Heading2"]
        ))

        elements.append(Paragraph(
            f"<b>Hint:</b> {ptext(ex.get('hint_model_id')) if ex.get('hint_model_id') else 'None'}", normal
        ))
        elements.append(Paragraph(
            f"<b>Model:</b> {ptext(ex.get('solver_model_id'))}", normal
        ))
        elements.append(Paragraph(
            f"<b>GT:</b> {ptext(ex.get('ground_truth_answer'))} ",
            normal,
        ))

        elements.append(Spacer(1, 6))

        elements.append(Paragraph("<b>Question</b>", styles["Heading3"]))
        elements.append(Paragraph(ptext(ex.get("question", "")), normal))

        elements.append(Spacer(1, 6))
        elements.append(Paragraph("<b>Cheatsheet</b>", styles["Heading3"]))
        elements.append(Paragraph(ptext(abs), normal))

        elements.append(Spacer(1, 6))
        elements.append(Paragraph("<b>Ground Truth (full)</b>", styles["Heading3"]))
        elements.append(Paragraph(ptext(gt_full), normal))

        elements.append(Spacer(1, 6))
        elements.append(Paragraph("<b>Solver Output</b>", styles["Heading3"]))
        elements.append(Paragraph(ptext(sol), mono_style))

        elements.append(PageBreak())


    doc.build(elements)
    print(f"Wrote: {pdf_path}")

In [6]:
export_outputs_pdf(
    jsonl_path = "../runs/hmmt25/records.jsonl",
    pdf_path = "../runs/hmmt25/outputs_report.pdf",
    max_rows = None,
    only_incorrect = False,
    detail_pages_for = "all"  # "incorrect" or "all"
)

Loaded 120 examples.
Wrote: ../runs/hmmt25/outputs_report.pdf


### Investigate the generated abstractions

In [118]:
path_to_hints = "../runs/hmmt25/hints_CMU-AIRe__RLAD-Hint-Gen_K4.jsonl"

results = []
with open(path_to_hints, "r", encoding="utf-8") as f:
    for line in f:
        results.append(json.loads(line))
results[0]["abstractions"]

['<note1>\n  <description>Use Legendre’s formula to find the exponent of a prime p in n!: the exponent is ∑_{i=1 to ∞} floor(n/p^i). This gives the exact prime factorization of n!.</description>\n  <example>For 7!, the exponent of 2 is floor(7/2)+floor(7/4)=3+1=4, so 7! contains 2^4 in its factorization.</example>\n</note1>\n<note2>\n  <description>The sum of divisors function σ(n) for n=∏p_i^{a_i} is ∏(1 + p_i + p_i^2 + … + p_i^{a_i}). Use this to compute σ(n) directly from its prime factorization.</description>\n  <example>If n=2^3·3^2, then σ(n)=(1+2+4+8)·(1+3+9)=15·13=195.</example>\n</note3>\n<note4>\n  <description>To select numbers with a specific last digit, factor out 10^k and work modulo 10. For primes other than 2 or 5, their units-digit contributions cycle; only those cycles that map to the desired digit matter.</description>\n  <example>For 12!, divide by 10^2 and compute σ(mod 10). Only primes congruent to 1 or 9 mod 10 contribute to units digit 1, so ignore p≡3,7,9 mod 1

In [20]:
from huggingface_hub import login


login(token="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")
from huggingface_hub import whoami
whoami()

{'type': 'user',
 'id': '697a36e094d75edc6188d98e',
 'name': 'vascorn',
 'fullname': 'Vasiliki Rizou',
 'isPro': False,
 'avatarUrl': '/avatars/f5301ef0d27fe1e21c88eba9fa80a56d.svg',
 'orgs': [],
 'auth': {'type': 'access_token',
  'accessToken': {'displayName': 'notebook',
   'role': 'fineGrained',
   'createdAt': '2026-02-01T21:01:41.472Z',
   'fineGrained': {'canReadGatedRepos': True,
    'global': [],
    'scoped': [{'entity': {'_id': '6745f28f9333dfcc06268b1e',
       'type': 'model',
       'name': 'meta-llama/Llama-3.3-70B-Instruct'},
      'permissions': ['repo.access.read', 'repo.content.read', 'repo.write']},
     {'entity': {'_id': '697a36e094d75edc6188d98e',
       'type': 'user',
       'name': 'vascorn'},
      'permissions': ['repo.access.read',
       'repo.content.read',
       'repo.write']}]}}}}

### (Once I get access...)

In [25]:
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"

olmo = AutoModelForCausalLM.from_pretrained(model_id)
tokenizer = AutoTokenizer.from_pretrained(model_id)

def build_hint_prompt(problem: str):
    system = (
        "You are a hint/abstraction generator for competition math.\n"
        "Given a problem, write a CHEATSHEET of 3–6 short notes that help solve the problem.\n"
        "Rules:\n"
        "- Do NOT reveal or compute the final numeric answer.\n"
        "- Prefer general procedures, identities, pitfalls, and checks.\n"
        "- Output ONLY the cheatsheet notes, no extra commentary.\n"
        "- Use the XML-ish format:\n"
        "  <note1><description>...</description><example>...</example></note1>\n"
        "  <note2>...</note2>\n"
    )
    user = f"PROBLEM:\n{problem}"
    return system, user

config.json:   0%|          | 0.00/654 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading (incomplete total...): 0.00B [00:00, ?B/s]

Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

Loading weights:   0%|          | 0/291 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/187 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/51.0k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/73.0 [00:00<?, ?B/s]