# Sinks: quickstart notebook

This notebook is a scratchpad to run the **current reproducible pipeline**:
- produce `artifacts/results/mmlu_accuracy_sink_*.json`
- analyze correlations (`sink_mass`, `entropy`, `correct`)
- build plots into `artifacts/plots/`

> Note: running models requires a working `torch/transformers` setup and (ideally) a GPU.


In [2]:
from __future__ import annotations

from pathlib import Path
import json
import subprocess


def find_repo_root(start: Path | None = None) -> Path:
    """Find repo root by walking upwards until pyproject.toml is found."""
    p = (start or Path.cwd()).resolve()
    for cur in [p, *p.parents]:
        if (cur / "pyproject.toml").exists():
            return cur
    raise FileNotFoundError(
        "Could not find repo root (pyproject.toml not found). "
        "Run the notebook from within the repo, or set REPO manually."
    )


REPO = find_repo_root()
CONFIG_PATH = REPO / "configs" / "mmlu_accuracy.json"
config = json.loads(CONFIG_PATH.read_text())
config

{'task': 'mmlu_accuracy_sink',
 'defaults': {'samples': 2000,
  'sink_tokens': 4,
  'chat': 'auto',
  'quantization': 'none',
  'device': 'cuda'},
 'models': ['mistralai/Mistral-7B-v0.1',
  'mistralai/Mistral-Nemo-Instruct-2407',
  'mistralai/Mixtral-8x7B-Instruct-v0.1',
  'Qwen/Qwen2.5-7B-Instruct',
  'Qwen/Qwen2.5-14B-Instruct',
  'Qwen/Qwen2.5-72B-Instruct']}

In [3]:
# Example: run a *small* smoke test (few samples) to verify everything works.
# Adjust model / samples depending on your hardware.
#
# Note: If `uv` is not available inside your Jupyter kernel PATH,
# we fall back to running via the current Python interpreter.

import sys
import shutil


def run_python(script: Path, args: list[str]):
    uv = shutil.which("uv")
    if uv:
        cmd = [uv, "run", "python", str(script), *args]
    else:
        cmd = [sys.executable, str(script), *args]
    print("CMD:", " ".join(cmd))
    return subprocess.run(cmd, cwd=str(REPO), check=False)


model = config["models"][0]
args = [
    "--model",
    model,
    "--samples",
    "20",
    "--sink_tokens",
    str(config["defaults"]["sink_tokens"]),
    "--chat",
    config["defaults"]["chat"],
]

print("REPO:", REPO)
run_python(REPO / "scripts" / "measure_accuracy.py", args)

REPO: /Users/aeshef/Downloads/sinks
uv run python /Users/aeshef/Downloads/sinks/scripts/measure_accuracy.py --model mistralai/Mistral-7B-v0.1 --samples 20 --sink_tokens 4 --chat auto


Error: 

In [None]:
# Analyze all existing results + build the main comparison plot

import sys
import shutil


def run_python(script: Path, args: list[str] | None = None):
    args = args or []
    uv = shutil.which("uv")
    if uv:
        cmd = [uv, "run", "python", str(script), *args]
    else:
        cmd = [sys.executable, str(script), *args]
    print("CMD:", " ".join(cmd))
    return subprocess.run(cmd, cwd=str(REPO), check=False)


run_python(REPO / "scripts" / "analyze_mmlu_results.py")
run_python(REPO / "scripts" / "compare_accuracy_sink.py")
print("Plots:", REPO / "artifacts" / "plots")