# a-lm colab training

This notebook runs a full from-scratch pretrain on Colab using the larger `nano` config and the Colab corpus preset.


## Optional drive mount
Use this if your repo or outputs live on Google Drive.


In [None]:
from google.colab import drive

drive.mount("/content/drive")

Mounted at /content/drive


## Locate or clone the repo
If you already uploaded the repo, this will use it. Otherwise it clones into `/content/a-lm`.


In [None]:
import os
from pathlib import Path

# Prefer a repo on Google Drive (persistent). If either location already contains
# checkpoints, prefer the one with the newest checkpoint.
drive_repo = Path("/content/drive/MyDrive/a-lm")
local_repo = Path("/content/a-lm")
repo_candidates = [drive_repo, local_repo]


def newest_run_mtime(repo: Path) -> float:
    runs_dir = repo / "runs"
    artifacts_dir = repo / "artifacts"
    if not runs_dir.is_dir() or not artifacts_dir.is_dir():
        return -1.0
    best = -1.0
    for run_dir in runs_dir.glob("2*"):
        run_id = run_dir.name
        ckpt = run_dir / "pretrain" / "ckpt-last.pt"
        tok = artifacts_dir / run_id / "tokenizer.json"
        if ckpt.exists() and tok.exists():
            best = max(best, float(ckpt.stat().st_mtime))
    return best


repo_path: Path | None = None
drive_mtime = newest_run_mtime(drive_repo) if drive_repo.is_dir() else -1.0
local_mtime = newest_run_mtime(local_repo) if local_repo.is_dir() else -1.0

if max(drive_mtime, local_mtime) >= 0:
    repo_path = drive_repo if drive_mtime >= local_mtime else local_repo
elif drive_repo.is_dir():
    repo_path = drive_repo
elif local_repo.is_dir():
    repo_path = local_repo
else:
    # Fresh clone: default to Drive if mounted.
    repo_path = drive_repo if drive_repo.parent.is_dir() else local_repo
    !git clone https://github.com/Ammaar-Alam/a-lm.git {repo_path}

%cd {repo_path}
print("repo:", repo_path)

/content/a-lm


In [None]:
print(
    "Next: run the 'Install pinned dependencies' cell below."
    " Restart only if Colab warns about imports."
    " Then continue to 'Hugging Face login' and 'Start pretraining'."
)

Next: run the 'Install pinned dependencies' cell below. Restart only if Colab warns about imports. Then continue to 'Hugging Face login' and 'Start pretraining'.


## Install pinned dependencies
These versions avoid Colab crashes and keep `transformers` compatibility.
This cell intentionally does **not** downgrade `numpy` (downgrades force restarts and conflict with Colab preinstalls).


In [None]:
%pip install -U "huggingface_hub<1.0" "datasets>=2.19,<3" "pyarrow>=15.0.2,<19" \
  "gcsfs" "tokenizers>=0.22.0,<=0.23.0"
%pip install -e . --no-deps

Collecting datasets<3,>=2.19
  Downloading datasets-2.21.0-py3-none-any.whl.metadata (21 kB)
Collecting gcsfs
  Downloading gcsfs-2026.1.0-py3-none-any.whl.metadata (2.1 kB)
Collecting fsspec>=2023.5.0 (from huggingface_hub<1.0)
  Downloading fsspec-2024.6.1-py3-none-any.whl.metadata (11 kB)
INFO: pip is looking at multiple versions of gcsfs to determine which version is compatible with other requirements. This could take a while.
Collecting gcsfs
  Downloading gcsfs-2025.12.0-py3-none-any.whl.metadata (2.1 kB)
  Downloading gcsfs-2025.10.0-py2.py3-none-any.whl.metadata (2.1 kB)
  Downloading gcsfs-2025.9.0-py2.py3-none-any.whl.metadata (2.1 kB)
  Downloading gcsfs-2025.7.0-py2.py3-none-any.whl.metadata (2.1 kB)
  Downloading gcsfs-2025.5.1-py2.py3-none-any.whl.metadata (1.9 kB)
  Downloading gcsfs-2025.5.0.post1-py2.py3-none-any.whl.metadata (1.9 kB)
  Downloading gcsfs-2025.5.0-py2.py3-none-any.whl.metadata (1.9 kB)
INFO: pip is still looking at multiple versions of gcsfs to determin

## Optional: enable verbose training logs
By default the progress bar updates live. If you want per-step log lines, run this cell.


In [None]:
from pathlib import Path

Path("configs/train_colab_verbose.yaml").write_text("""
optim:
  name: adamw
  lr: 3e-4
  betas: [0.9, 0.95]
  weight_decay: 0.1
  eps: 1e-8

scheduler:
  name: cosine
  warmup_steps: 1000
  max_steps: 20000

training:
  micro_batch_size: 4
  gradient_accumulation: 8
  max_steps: 20000
  checkpoint_interval: 500
  gradient_clip_norm: 0.5
  mixed_precision: fp16
  grad_checkpointing: false
  seed: 1337
  dataloader_workers: 2

logging:
  log_interval: 1
  rich_progress: false
""")
print("Wrote configs/train_colab_verbose.yaml")

Wrote configs/train_colab_verbose.yaml


## Hugging Face login
Paste your token when prompted.


In [None]:
from huggingface_hub import login
from google.colab import userdata

login(userdata.get('HF_TOKEN'))

## GPU check
Make sure CUDA is available before training.


In [None]:
import torch

!nvidia-smi
print(torch.__version__, torch.cuda.is_available(), torch.version.cuda)

Thu Jan 29 21:37:00 2026       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA H100 80GB HBM3          Off |   00000000:04:00.0 Off |                    0 |
| N/A   31C    P0             69W /  700W |       0MiB /  81559MiB |      0%      Default |
|                                         |                        |             Disabled |
+-----------------------------------------+------------------------+----------------------+
                                                

## Start pretraining
This cell **always** sets `LAST_RUN.txt`.

- If it finds an existing run (`runs/<RUN>/pretrain/ckpt-last.pt` + `artifacts/<RUN>/tokenizer.json`), it will reuse it and **skip pretraining**.
- If it finds a `*.zip` containing those paths, it will import them and **skip pretraining**.
- Otherwise it starts a fresh pretrain run.

Tip: you can upload a zip via the left sidebar **Files → Upload** (it lands in `/content`).


In [None]:
import os
import shutil
import subprocess
import time
import zipfile
from pathlib import Path

# If True, always start a fresh pretrain run (ignores existing checkpoints).
force_new_pretrain = False


def find_usable_runs(repo: Path) -> list[tuple[float, str]]:
    runs_dir = repo / "runs"
    artifacts_dir = repo / "artifacts"
    if not runs_dir.is_dir() or not artifacts_dir.is_dir():
        return []
    candidates: list[tuple[float, str]] = []
    for run_dir in runs_dir.glob("2*"):
        run_id = run_dir.name
        ckpt = run_dir / "pretrain" / "ckpt-last.pt"
        tok = artifacts_dir / run_id / "tokenizer.json"
        if ckpt.exists() and tok.exists():
            candidates.append((float(ckpt.stat().st_mtime), run_id))
    candidates.sort()
    return candidates


def choose_repo_with_runs(candidates: list[Path]) -> Path | None:
    best: tuple[float, Path] | None = None
    for cand in candidates:
        if not cand.is_dir():
            continue
        runs = find_usable_runs(cand)
        if not runs:
            continue
        mtime, _run_id = runs[-1]
        if best is None or mtime > best[0]:
            best = (mtime, cand)
    return best[1] if best else None


def zip_run_ids(zip_path: Path) -> list[str]:
    ckpt_runs: set[str] = set()
    tok_runs: set[str] = set()
    with zipfile.ZipFile(zip_path) as zf:
        for name in zf.namelist():
            parts = Path(name).parts
            for i, part in enumerate(parts):
                if (
                    part == "runs"
                    and i + 3 < len(parts)
                    and parts[i + 2] == "pretrain"
                    and parts[i + 3] == "ckpt-last.pt"
                ):
                    ckpt_runs.add(parts[i + 1])
                if part == "artifacts" and i + 2 < len(parts) and parts[i + 2] == "tokenizer.json":
                    tok_runs.add(parts[i + 1])
    return sorted(ckpt_runs & tok_runs)


def extract_runs_and_artifacts(zip_path: Path, dest_repo: Path) -> int:
    def rel_path(parts: tuple[str, ...]) -> Path | None:
        for i, part in enumerate(parts):
            if part in {"runs", "artifacts"}:
                return Path(*parts[i:])
            if part == "data" and i + 1 < len(parts) and parts[i + 1] in {"packed", "sft_packed", "sft"}:
                return Path(*parts[i:])
        return None

    extracted = 0
    with zipfile.ZipFile(zip_path) as zf:
        for info in zf.infolist():
            if info.is_dir():
                continue
            parts = Path(info.filename).parts
            rel = rel_path(parts)
            if rel is None:
                continue
            dest = dest_repo / rel
            if dest.exists():
                continue
            dest.parent.mkdir(parents=True, exist_ok=True)
            with zf.open(info) as src, dest.open("wb") as dst:
                shutil.copyfileobj(src, dst)
            extracted += 1
    return extracted


repo_root = Path.cwd()

# If another repo location already has checkpoints, switch there.
alternate_repos = [Path("/content/drive/MyDrive/a-lm"), Path("/content/a-lm")]
best_repo = choose_repo_with_runs(alternate_repos)
if best_repo is not None and best_repo.resolve() != repo_root.resolve():
    print("Found existing run artifacts in:", best_repo)
    os.chdir(best_repo)
    repo_root = Path.cwd()
    print("cwd ->", repo_root)

# Reuse an existing run if present (prevents re-pretraining by accident).
if not force_new_pretrain:
    runs = find_usable_runs(repo_root)
    if runs:
        _mtime, run_id = runs[-1]
        Path("LAST_RUN.txt").write_text(run_id)
        print("Found existing pretrain checkpoint; skipping pretraining.")
        print("run:", run_id)
    else:
        # Try importing from a zip (either in /content, Drive, or the repo parent).
        search_dirs = [repo_root, repo_root.parent, Path("/content")]
        drive_root = Path("/content/drive/MyDrive")
        if drive_root.exists():
            search_dirs.append(drive_root)
        zip_files: list[Path] = []
        for d in search_dirs:
            if d.is_dir():
                zip_files.extend(sorted(d.glob("*.zip")))

        best_zip: tuple[int, float, Path] | None = None
        for z in zip_files:
            try:
                run_ids = zip_run_ids(z)
            except zipfile.BadZipFile:
                continue
            if not run_ids:
                continue
            score = len(run_ids)
            mtime = float(z.stat().st_mtime)
            if best_zip is None or (score, mtime) > (best_zip[0], best_zip[1]):
                best_zip = (score, mtime, z)

        if best_zip is None:
            try:
                from google.colab import files

                print("No checkpoints found on disk.")
                print("Upload a zip that contains runs/<RUN>/pretrain/ckpt-last.pt and artifacts/<RUN>/tokenizer.json")
                uploaded = files.upload()
                for name in uploaded:
                    z = Path(name)
                    if z.suffix.lower() == ".zip":
                        best_zip = (1, float(z.stat().st_mtime), z)
                        break
            except Exception as exc:
                print("Zip import unavailable (not running in Colab?):", exc)

        if best_zip is not None:
            zip_path = best_zip[2]
            print("Importing checkpoints from zip:", zip_path)
            extracted = extract_runs_and_artifacts(zip_path, repo_root)
            print("files extracted:", extracted)
            runs = find_usable_runs(repo_root)
            if runs:
                _mtime, run_id = runs[-1]
                Path("LAST_RUN.txt").write_text(run_id)
                print("Imported pretrain checkpoint; skipping pretraining.")
                print("run:", run_id)
            else:
                raise FileNotFoundError("Zip import completed but no usable run was found.")
        else:
            print("No existing checkpoints found. Starting a fresh pretrain run...")
            force_new_pretrain = True

if force_new_pretrain:
    run_id = time.strftime("%Y%m%d-%H%M%S")
    Path("LAST_RUN.txt").write_text(run_id)
    print("run:", run_id)

    train_cfg = "configs/train_colab.yaml"
    if Path("configs/train_colab_verbose.yaml").exists():
        train_cfg = "configs/train_colab_verbose.yaml"
        print("using verbose logging config")

    cmd = ["make", "colab-pretrain", f"RUN={run_id}", f"TRAIN_CFG={train_cfg}"]
    print("command:", " ".join(cmd))
    result = subprocess.run(cmd)
    if result.returncode != 0:
        raise RuntimeError(f"make failed with exit code {result.returncode}")

run: 20260129-213701
using verbose logging config
command: make colab-pretrain RUN=20260129-213701 TRAIN_CFG=configs/train_colab_verbose.yaml


RuntimeError: make failed with exit code 2

In [None]:
import subprocess
from pathlib import Path

repo_root = Path.cwd()
runs_dir = repo_root / "runs"
artifacts_dir = repo_root / "artifacts"
packed_root = repo_root / "data" / "packed"

# Find candidate runs that have the files needed to resume.
candidates: list[tuple[float, str]] = []
for run_dir in sorted(runs_dir.glob("2*")):
    run_id = run_dir.name
    ckpt = run_dir / "pretrain" / "ckpt-last.pt"
    tok = artifacts_dir / run_id / "tokenizer.json"
    packed = packed_root / run_id
    if ckpt.exists() and tok.exists() and packed.exists():
        candidates.append((float(ckpt.stat().st_mtime), run_id))

if not candidates:
    raise FileNotFoundError(
        "No resumable runs found (expected runs/<RUN>/pretrain/ckpt-last.pt + artifacts/<RUN>/tokenizer.json + data/packed/<RUN>)"
    )

# Newest checkpoint by modification time.
candidates.sort()
run_id = candidates[-1][1]
Path("LAST_RUN.txt").write_text(run_id)
print("resuming run:", run_id)

ckpt = runs_dir / run_id / "pretrain" / "ckpt-last.pt"
tok = artifacts_dir / run_id / "tokenizer.json"
packed = packed_root / run_id

train_cfg = "configs/train_colab.yaml"
if Path("configs/train_colab_verbose.yaml").exists():
    train_cfg = "configs/train_colab_verbose.yaml"

cmd = [
    "python3",
    "scripts/train_pretrain.py",
    "--model",
    "configs/nano.yaml",
    "--train",
    train_cfg,
    "--data",
    str(packed),
    "--out",
    str(runs_dir / run_id / "pretrain"),
    "--device",
    "auto",
    "--tokenizer",
    str(tok),
    "--resume",
    str(ckpt),
]

print("command:", " ".join(cmd))
result = subprocess.run(cmd)
if result.returncode != 0:
    raise RuntimeError(f"resume failed with exit code {result.returncode}")

resuming run: 20260129-214515
command: python3 scripts/train_pretrain.py --model configs/nano.yaml --train configs/train_colab_verbose.yaml --data data/packed/20260129-214515 --out runs/20260129-214515/pretrain --device auto --tokenizer artifacts/20260129-214515/tokenizer.json --resume runs/20260129-214515/pretrain/ckpt-last.pt


KeyboardInterrupt: 

## Chat with the latest checkpoint


In [15]:
from pathlib import Path

def resolve_run_id() -> str:
    last = Path("LAST_RUN.txt")
    if last.exists():
        value = last.read_text().strip()
        if value:
            return value

    candidates: list[tuple[float, str]] = []
    for run_dir in sorted(Path("runs").glob("2*")):
        run_id = run_dir.name
        ckpt = run_dir / "pretrain" / "ckpt-last.pt"
        tok = Path("artifacts") / run_id / "tokenizer.json"
        if ckpt.exists() and tok.exists():
            candidates.append((float(ckpt.stat().st_mtime), run_id))

    if not candidates:
        raise FileNotFoundError(
            "No runs found. Run the 'Start pretraining' cell (it can import a zip) first."
        )

    candidates.sort()
    run_id = candidates[-1][1]
    last.write_text(run_id)
    return run_id


run_id = resolve_run_id()
print("using run", run_id)
!make chat RUN={run_id}

using run 20260129-214515
python3 scripts/chat_cli.py \
	--checkpoint runs/20260129-214515/pretrain/ckpt-last.pt \
	--tokenizer artifacts/20260129-214515/tokenizer.json \
	--device auto
Loaded model on cuda. Context window ~2048 tokens.
Type /exit to quit. Press Ctrl+C to abort.
you> hello
alm> New research at the University of Virginia by a research group of women who are at the University of Virginia who are at a high school. The research program at the University of Virginia has been the primary school of a career in the field of education. The program is a primary school of high school and is the most common for women in their college and in the school. The program is a program of the program program that is conducted by the school district of the University of Virginia. 
 The first New World is the U.S. National Science and Technology Program (NCI), which is a team of researchers and University researchers. The
you> how are you doing?
alm> The U.S. National Science Foundation (NSF

## SFT (instruction tuning)

Pretraining teaches general language modeling, but not chat behavior. SFT is the step that teaches the `System/User/Assistant` prompt format used by `scripts/chat_cli.py`.


In [None]:
from pathlib import Path
import subprocess

def resolve_run_id() -> str:
    last = Path("LAST_RUN.txt")
    if last.exists():
        value = last.read_text().strip()
        if value:
            return value

    candidates: list[tuple[float, str]] = []
    for run_dir in sorted(Path("runs").glob("2*")):
        run_id = run_dir.name
        ckpt = run_dir / "pretrain" / "ckpt-last.pt"
        tok = Path("artifacts") / run_id / "tokenizer.json"
        if ckpt.exists() and tok.exists():
            candidates.append((float(ckpt.stat().st_mtime), run_id))

    if not candidates:
        raise FileNotFoundError(
            "No runs found. Run the 'Start pretraining' cell (it can import a zip) first."
        )

    candidates.sort()
    run_id = candidates[-1][1]
    last.write_text(run_id)
    return run_id


run_id = resolve_run_id()
print("using run", run_id)

sft_jsonl = Path(f"data/sft/{run_id}/clean.jsonl")
sft_jsonl.parent.mkdir(parents=True, exist_ok=True)

# Set an integer for quicker iterations (e.g. 20000). Leave as None to use full datasets.
max_per_source = None

cmd = ["python3", "scripts/prepare_sft.py", "--out", str(sft_jsonl)]
if max_per_source is not None:
    cmd += ["--max-per-source", str(max_per_source)]
print("command:", " ".join(cmd))
subprocess.run(cmd, check=True)


In [None]:
from pathlib import Path
import subprocess

def resolve_run_id() -> str:
    last = Path("LAST_RUN.txt")
    if last.exists():
        value = last.read_text().strip()
        if value:
            return value

    candidates: list[tuple[float, str]] = []
    for run_dir in sorted(Path("runs").glob("2*")):
        run_id = run_dir.name
        ckpt = run_dir / "pretrain" / "ckpt-last.pt"
        tok = Path("artifacts") / run_id / "tokenizer.json"
        if ckpt.exists() and tok.exists():
            candidates.append((float(ckpt.stat().st_mtime), run_id))

    if not candidates:
        raise FileNotFoundError(
            "No runs found. Run the 'Start pretraining' cell (it can import a zip) first."
        )

    candidates.sort()
    run_id = candidates[-1][1]
    last.write_text(run_id)
    return run_id


run_id = resolve_run_id()
tok = Path(f"artifacts/{run_id}/tokenizer.json")
sft_jsonl = Path(f"data/sft/{run_id}/clean.jsonl")
sft_packed = Path(f"data/sft_packed/{run_id}")

seq_len = 384
shard_size = 1_000_000
workers = 6
chunk_size = 64

cmd = [
    "python3",
    "scripts/pack_sft.py",
    "--tokenizer",
    str(tok),
    "--jsonl",
    str(sft_jsonl),
    "--out",
    str(sft_packed),
    "--seq-len",
    str(seq_len),
    "--shard-size",
    str(shard_size),
    "--workers",
    str(workers),
    "--chunk-size",
    str(chunk_size),
]
print("command:", " ".join(cmd))
subprocess.run(cmd, check=True)


In [None]:
from pathlib import Path
import subprocess

def resolve_run_id() -> str:
    last = Path("LAST_RUN.txt")
    if last.exists():
        value = last.read_text().strip()
        if value:
            return value

    candidates: list[tuple[float, str]] = []
    for run_dir in sorted(Path("runs").glob("2*")):
        run_id = run_dir.name
        ckpt = run_dir / "pretrain" / "ckpt-last.pt"
        tok = Path("artifacts") / run_id / "tokenizer.json"
        if ckpt.exists() and tok.exists():
            candidates.append((float(ckpt.stat().st_mtime), run_id))

    if not candidates:
        raise FileNotFoundError(
            "No runs found. Run the 'Start pretraining' cell (it can import a zip) first."
        )

    candidates.sort()
    run_id = candidates[-1][1]
    last.write_text(run_id)
    return run_id


run_id = resolve_run_id()
tok = Path(f"artifacts/{run_id}/tokenizer.json")
init_ckpt = Path(f"runs/{run_id}/pretrain/ckpt-last.pt")
sft_packed = Path(f"data/sft_packed/{run_id}")
sft_out = Path(f"runs/{run_id}/sft")

model_cfg = "configs/nano.yaml"
train_cfg = "configs/sft.yaml"

cmd = [
    "python3",
    "scripts/train_sft.py",
    "--model",
    model_cfg,
    "--train",
    train_cfg,
    "--data",
    str(sft_packed),
    "--out",
    str(sft_out),
    "--device",
    "auto",
    "--init",
    str(init_ckpt),
    "--tokenizer",
    str(tok),
]
print("command:", " ".join(cmd))
subprocess.run(cmd, check=True)


In [None]:
from pathlib import Path

def resolve_run_id() -> str:
    last = Path("LAST_RUN.txt")
    if last.exists():
        value = last.read_text().strip()
        if value:
            return value

    candidates: list[tuple[float, str]] = []
    for run_dir in sorted(Path("runs").glob("2*")):
        run_id = run_dir.name
        ckpt = run_dir / "pretrain" / "ckpt-last.pt"
        tok = Path("artifacts") / run_id / "tokenizer.json"
        if ckpt.exists() and tok.exists():
            candidates.append((float(ckpt.stat().st_mtime), run_id))

    if not candidates:
        raise FileNotFoundError(
            "No runs found. Run the 'Start pretraining' cell (it can import a zip) first."
        )

    candidates.sort()
    run_id = candidates[-1][1]
    last.write_text(run_id)
    return run_id


run_id = resolve_run_id()
print("using run", run_id)
!make chat RUN={run_id} CHECKPOINT=runs/{run_id}/sft/ckpt-last.pt


## RLVR post-training


In [None]:
from pathlib import Path

def resolve_run_id() -> str:
    last = Path("LAST_RUN.txt")
    if last.exists():
        value = last.read_text().strip()
        if value:
            return value

    candidates: list[tuple[float, str]] = []
    for run_dir in sorted(Path("runs").glob("2*")):
        run_id = run_dir.name
        ckpt = run_dir / "pretrain" / "ckpt-last.pt"
        tok = Path("artifacts") / run_id / "tokenizer.json"
        if ckpt.exists() and tok.exists():
            candidates.append((float(ckpt.stat().st_mtime), run_id))

    if not candidates:
        raise FileNotFoundError(
            "No runs found. Run the 'Start pretraining' cell (it can import a zip) first."
        )

    candidates.sort()
    run_id = candidates[-1][1]
    last.write_text(run_id)
    return run_id


run_id = resolve_run_id()
print("using run", run_id)

!make rlvr-data
!make rlvr-train RUN={run_id} RLVR_INIT=runs/{run_id}/sft/ckpt-last.pt
!make chat RUN={run_id} CHECKPOINT=runs/{run_id}/rlvr/ckpt-last.pt

using run 20260129-214515
python3 scripts/generate_rlvr_math.py --out data/rlvr/math.jsonl --count 20000
Wrote 20,000 examples to data/rlvr/math.jsonl
python3 scripts/train_rlvr.py \
	--init runs/20260129-214515/pretrain/ckpt-last.pt \
	--tokenizer artifacts/20260129-214515/tokenizer.json \
	--data data/rlvr/math.jsonl \
	--out runs/20260129-214515/rlvr \
	--device auto
  _C._set_float32_matmul_precision(precision)
step=1/2000 loss=0.0002 pg=0.0002 kl=0.0000 r_mean=0.000 tok/s=81 wall=1s
step=25/2000 loss=0.0026 pg=0.0000 kl=0.0026 r_mean=0.000 tok/s=159 wall=35s
step=50/2000 loss=0.0105 pg=-0.0001 kl=0.0105 r_mean=0.000 tok/s=172 wall=72s
step=75/2000 loss=0.0421 pg=-0.0000 kl=0.0422 r_mean=0.000 tok/s=173 wall=110s
step=100/2000 loss=0.0480 pg=0.0000 kl=0.0480 r_mean=0.000 tok/s=165 wall=148s
step=125/2000 loss=0.0697 pg=-0.0000 kl=0.0697 r_mean=0.000 tok/s=170 wall=186s
step=150/2000 loss=0.0580 pg=0.0000 kl=0.0580 r_mean=0.000 tok/s=170 wall=224s
step=175/2000 loss=0.0651 pg=-0.000