# Week 2.6: Leakage-Controlled Evaluation

**Purpose**: Run inference on original vs sanitized text to measure actual F1 delta.

**Requires**: GPU runtime (T4 recommended)

In [None]:
# 1. Clone Repository
!git clone https://github.com/AngadSingh22/Text2Diag.git
%cd Text2Diag

In [None]:
# 2. Install Dependencies
!pip install -q torch transformers accelerate scikit-learn datasets pyyaml

In [None]:
# 3. Download Raw Dataset (REQUIRED before building canonical)
!python scripts/inspect_raw_datasets.py

In [None]:
# 4. Build Canonical Dataset
!python scripts/02_build_reddit_canonical.py

In [None]:
# 5. Train Model (or upload existing checkpoint)
import os
checkpoint_path = "results/week2/checkpoints/checkpoint-4332"

# Always retrain since we don't have checkpoint
print("Training model...")
!python scripts/03_train_baseline.py \
    --data_dir data/processed/reddit_mh_windows \
    --out_dir results/week2 \
    --model_name distilbert-base-uncased \
    --max_len 256 \
    --batch_size 8 \
    --grad_accum 4 \
    --epochs 3 \
    --lr 2e-5

# Find the checkpoint
import glob
checkpoints = glob.glob("results/week2/checkpoints/checkpoint-*")
if checkpoints:
    checkpoint_path = max(checkpoints, key=lambda x: int(x.split('-')[-1]))
    print(f"Using checkpoint: {checkpoint_path}")
else:
    print("ERROR: No checkpoint found!")

In [None]:
# 6. Run Leakage-Controlled Evaluation
!python scripts/09_eval_sanitized.py \
    --checkpoint {checkpoint_path} \
    --data_dir data/processed/reddit_mh_windows \
    --out_dir results/week2/remediation \
    --sanitize_config configs/sanitize.yaml \
    --batch_size 32

In [None]:
# 7. Check Results
!cat results/week2/remediation/leakage_eval_metrics.md

In [None]:
# 8. View JSON Metrics
import json
with open('results/week2/remediation/leakage_eval_metrics.json', 'r') as f:
    metrics = json.load(f)
print(json.dumps(metrics, indent=2))

In [None]:
# 9. Zip and Download Results
!zip -r w26_results.zip results/week2/remediation
from google.colab import files
files.download('w26_results.zip')