# Iterative GPT-2 DeID Fine-Tuning (1-hour Loop)\n\nThis notebook runs the repeatable dev-only loop: `scripts/iterative_train.py`.\n\nWhat it does:\n- Prepares a prompt+target JSONL corpus from `{id,input,output}` pairs\n- Trains short rounds (e.g. 500â€“2000 steps)\n- Evaluates every round and appends a row to `summary.csv`\n- Prunes checkpoints to keep disk usage bounded\n

In [None]:
import os\nfrom pathlib import Path\n\n# AI_WAREHOUSE 3.0 cache layout (avoid $HOME/.cache)\nos.environ.setdefault('EDGE_DEID_CACHE_HOME', '/mnt/c/ai_cache')\nos.environ.setdefault('EDGE_DEID_MODELS_HOME', '/mnt/c/ai_models')\nos.environ.setdefault('EDGE_DEID_DATA_HOME', '/mnt/data')\n\nos.environ.setdefault('HF_HOME', '/mnt/c/ai_cache/huggingface')\nos.environ.setdefault('TRANSFORMERS_CACHE', os.environ['HF_HOME'])\nos.environ.setdefault('TORCH_HOME', '/mnt/c/ai_cache/torch')\nos.environ.setdefault('XDG_CACHE_HOME', '/mnt/c/ai_cache')\n\nPath(os.environ['HF_HOME']).mkdir(parents=True, exist_ok=True)\nPath(os.environ['TORCH_HOME']).mkdir(parents=True, exist_ok=True)\n

In [None]:
# Start a 1-hour iterative run (edit the config to point to your real datasets).\n!PYTHONPATH=src python scripts/iterative_train.py \\\n  --config configs/training/deid_gpt2_iterative_zh.yaml\n

In [None]:
import json\nfrom pathlib import Path\n\n# Inspect the best checkpoint pointer after the run.\nrun_slug = 'deid-gpt2-zh-iterative'\nbest_path = Path(f'/mnt/data/training/logs/edge_deid/{run_slug}/best.json')\nif best_path.exists():\n    best = json.loads(best_path.read_text(encoding='utf-8'))\n    print('Best checkpoint:', best.get('best_checkpoint_dir'))\n    print('Best score:', best.get('best_score'))\nelse:\n    print('best.json not found yet:', best_path)\n