# IntelHealth - Colab Training (Per-Agent)

This notebook is designed for **one agent per run**.
Pick `AGENT_NAME`, then run all cells.

All agents use `supervised_finetuning.py` with Qwen ChatML template.
- 4 grader/normalizer agents → Qwen3-0.6B (loss on response only)
- diagnosis_generator → Qwen3-1.7B (loss on all tokens via `--train_on_inputs`)

In [None]:
# Clone repo + pull latest
import os
repo_dir = '/content/Intel_Health'
if not os.path.exists(repo_dir):
    !git clone https://github.com/DemonRain7/Intel_Health.git {repo_dir}
else:
    !git -C {repo_dir} pull
%cd {repo_dir}

In [None]:
# Install dependencies (need transformers>=4.46 for Qwen3 support)
!pip -q install torch "transformers>=4.46" datasets peft accelerate bitsandbytes sentencepiece loguru

# HuggingFace login (Qwen3 requires authentication)
# Option 1: Colab Secrets (recommended, persistent across sessions)
#   Click the key icon in the left sidebar -> Add secret:
#   Name: HF_TOKEN    Value: your token from https://huggingface.co/settings/tokens
#   Then toggle "Notebook access" ON
# Option 2: Falls back to interactive login() if secret not found
import os
try:
    from google.colab import userdata
    os.environ["HF_TOKEN"] = userdata.get("HF_TOKEN")
    print("HF_TOKEN loaded from Colab Secrets")
except (ImportError, userdata.SecretNotFoundError):
    from huggingface_hub import login
    login()  # interactive fallback

In [None]:
# Mount Google Drive and set persistent paths
from google.colab import drive
drive.mount('/content/drive')

DRIVE_ROOT = '/content/drive/MyDrive/Code_Project/IntelHealth'
SFT_DATA_ROOT = f'{DRIVE_ROOT}/datasets/agent_sft'
ADAPTER_OUTPUT_ROOT = f'{DRIVE_ROOT}/models/adapters'
os.makedirs(SFT_DATA_ROOT, exist_ok=True)
os.makedirs(ADAPTER_OUTPUT_ROOT, exist_ok=True)
print('SFT_DATA_ROOT:', SFT_DATA_ROOT)
print('ADAPTER_OUTPUT_ROOT:', ADAPTER_OUTPUT_ROOT)

## 1) Select Agent (single run)

Supported training agents:
- symptom_normalizer (SFT, 0.6B)
- symptom_quality_grader (SFT, 0.6B)
- rag_relevance_grader (SFT, 0.6B)
- drug_evidence_grader (SFT, 0.6B)
- diagnosis_generator (SFT + train_on_inputs, 1.7B)

In [None]:
import os
import subprocess, shlex
from pathlib import Path
from datetime import datetime

AGENT_NAME = "symptom_quality_grader"  # <-- change this each run

MODEL_BY_AGENT = {
    "symptom_normalizer":     "Qwen/Qwen3-0.6B",
    "symptom_quality_grader": "Qwen/Qwen3-0.6B",
    "rag_relevance_grader":   "Qwen/Qwen3-0.6B",
    "drug_evidence_grader":   "Qwen/Qwen3-0.6B",
    "diagnosis_generator":    "Qwen/Qwen3-1.7B",
}

# --train_file_dir expects a DIRECTORY (globs *.jsonl inside)
DATA_DIR_BY_AGENT = {
    "symptom_normalizer":     f"{SFT_DATA_ROOT}/symptom_normalizer",
    "symptom_quality_grader": f"{SFT_DATA_ROOT}/symptom_quality_grader",
    "rag_relevance_grader":   f"{SFT_DATA_ROOT}/rag_relevance_grader",
    "drug_evidence_grader":   f"{SFT_DATA_ROOT}/drug_evidence_grader",
    "diagnosis_generator":    f"{SFT_DATA_ROOT}/diagnosis_generator",
}

OUTPUT_BY_AGENT = {
    "symptom_normalizer":     f"{ADAPTER_OUTPUT_ROOT}/symptom_normalizer",
    "symptom_quality_grader": f"{ADAPTER_OUTPUT_ROOT}/symptom_quality_grader",
    "rag_relevance_grader":   f"{ADAPTER_OUTPUT_ROOT}/rag_relevance_grader",
    "drug_evidence_grader":   f"{ADAPTER_OUTPUT_ROOT}/drug_evidence_grader",
    "diagnosis_generator":    f"{ADAPTER_OUTPUT_ROOT}/diagnosis_generator",
}

assert AGENT_NAME in MODEL_BY_AGENT, f"Unknown agent: {AGENT_NAME}"

for _agent_dir in OUTPUT_BY_AGENT.values():
    os.makedirs(_agent_dir, exist_ok=True)

MODEL_NAME = MODEL_BY_AGENT[AGENT_NAME]
TRAIN_DIR  = DATA_DIR_BY_AGENT[AGENT_NAME]
OUTPUT_BASE_DIR = OUTPUT_BY_AGENT[AGENT_NAME]
RUN_NAME = datetime.now().strftime("run_%Y%m%d_%H%M%S_%f")
OUTPUT_DIR = f"{OUTPUT_BASE_DIR}/{RUN_NAME}"

print(f"AGENT_NAME: {AGENT_NAME}")
print(f"MODEL_NAME: {MODEL_NAME}")
print(f"TRAIN_DIR:  {TRAIN_DIR}")
print(f"OUTPUT_DIR: {OUTPUT_DIR}")

## 2) Build command

All agents use `supervised_finetuning.py` with Qwen ChatML template.
- `diagnosis_generator` adds `--train_on_inputs` (loss on all tokens, DAPT-like behavior)
- Others only compute loss on model response tokens

In [None]:
SFT_SCRIPT = Path("training/supervised_finetuning.py")

# =============================================================
# Per-agent hyperparameters (edit freely, auto-selected by AGENT_NAME)
# =============================================================
AGENT_HP = {
    # DAPT: 1.7B model, 5000 MCQ samples, output=1 token
    "diagnosis_generator": {
        "epochs":     2,
        "lr":         "1e-4",
        "lora_r":     "32",
        "lora_alpha": "64",
        "max_len":    "256",
    },
    # SFT: 0.6B, 1500 samples, long input + long JSON output
    "symptom_normalizer": {
        "epochs":     3,
        "lr":         "2e-4",
        "lora_r":     "64",
        "lora_alpha": "128",
        "max_len":    "512",
    },
    # SFT: 0.6B, ~1500 samples, moderate input + short JSON output
    "symptom_quality_grader": {
        "epochs":     3,
        "lr":         "2e-4",
        "lora_r":     "64",
        "lora_alpha": "128",
        "max_len":    "256",
    },
    "rag_relevance_grader": {
        "epochs":     3,
        "lr":         "2e-4",
        "lora_r":     "64",
        "lora_alpha": "128",
        "max_len":    "256",
    },
    "drug_evidence_grader": {
        "epochs":     3,
        "lr":         "2e-4",
        "lora_r":     "64",
        "lora_alpha": "128",
        "max_len":    "256",
    },
}

# =============================================================
# Shared hyperparameters (A100-40GB optimized)
# =============================================================
BATCH = "8"
GRAD_ACC = "2"
LORA_DROPOUT = "0.05"

hp = AGENT_HP[AGENT_NAME]

cmd = [
    "python", str(SFT_SCRIPT),
    "--model_name_or_path",          MODEL_NAME,
    "--tokenizer_name_or_path",      MODEL_NAME,
    "--train_file_dir",              TRAIN_DIR,
    "--output_dir",                  OUTPUT_DIR,
    "--template_name",               "qwen",
    "--do_train",
    "--fp16",
    "--gradient_checkpointing",
    "--per_device_train_batch_size", BATCH,
    "--gradient_accumulation_steps", GRAD_ACC,
    "--num_train_epochs",            str(hp["epochs"]),
    "--learning_rate",               hp["lr"],
    "--lora_rank",                   hp["lora_r"],
    "--lora_alpha",                  hp["lora_alpha"],
    "--lora_dropout",                LORA_DROPOUT,
    "--model_max_length",            hp["max_len"],
    "--logging_steps",               "10",
    "--save_strategy",               "epoch",
]

# diagnosis_generator: loss on ALL tokens (DAPT-like)
if AGENT_NAME == "diagnosis_generator":
    cmd.append("--train_on_inputs")

print(f"Agent: {AGENT_NAME}")
print(f"Hyperparameters: {hp}")
print(f"Batch: {BATCH} x Grad_Acc: {GRAD_ACC} = Effective: {int(BATCH) * int(GRAD_ACC)}")
print(f"\nCommand:\n{' ' .join(shlex.quote(x) for x in cmd)}")


In [None]:
# Run training (real-time output)
import sys, subprocess, time as _time

_t0 = _time.time()
proc = subprocess.Popen(
    cmd,
    stdout=subprocess.PIPE,
    stderr=subprocess.STDOUT,   # merge stderr into stdout
    text=True,
    bufsize=1,                  # line-buffered
)

# Stream output line by line
for line in proc.stdout:
    print(line, end="", flush=True)

ret = proc.wait()
_elapsed = _time.time() - _t0

if ret != 0:
    raise RuntimeError(f"Training failed with exit code {ret}")
print(f"\nTraining complete in {_elapsed/60:.1f} min! Adapter saved to: {OUTPUT_DIR}")

In [None]:
# ============================================================
# Post-training analysis: loss curve + GPU stats + save report
# ============================================================
import json, torch
import matplotlib.pyplot as plt

# --- 1. Read trainer_state.json (contains per-step logs) ---
state_path = f"{OUTPUT_DIR}/trainer_state.json"
with open(state_path) as f:
    state = json.load(f)

logs = [e for e in state["log_history"] if "loss" in e]
steps = [e["step"] for e in logs]
losses = [e["loss"] for e in logs]
lrs = [e.get("learning_rate", 0) for e in logs]

# Final metrics
final_loss = losses[-1] if losses else None
total_steps = state.get("max_steps", steps[-1] if steps else 0)
best_step = min(logs, key=lambda e: e["loss"])["step"] if logs else 0

# --- 2. GPU memory stats ---
gpu_info = {}
if torch.cuda.is_available():
    props = torch.cuda.get_device_properties(0)
    gpu_info = {
        "gpu_name": torch.cuda.get_device_name(0),
        "allocated_gb": round(torch.cuda.max_memory_allocated(0) / 1e9, 2),
        "reserved_gb": round(torch.cuda.max_memory_reserved(0) / 1e9, 2),
        "total_gb": round(props.total_memory / 1e9, 2),
    }

# --- 3. Read train_results.json ---
results_path = f"{OUTPUT_DIR}/train_results.json"
train_results = {}
if os.path.exists(results_path):
    with open(results_path) as f:
        train_results = json.load(f)

# --- 4. Print summary ---
print(f"{'='*60}")
print(f"  Training Report: {AGENT_NAME}")
print(f"{'='*60}")
print(f"  Model:           {MODEL_NAME}")
print(f"  LoRA rank:       {hp['lora_r']}, alpha: {hp['lora_alpha']}")
print(f"  Epochs:          {hp['epochs']}")
print(f"  Total steps:     {total_steps}")
print(f"  Final loss:      {final_loss:.4f}" if final_loss else "  Final loss:      N/A")
print(f"  Best loss:       {best_step} step = {min(losses):.4f}" if losses else "")
print(f"  Training time:   {train_results.get('train_runtime', 0)/60:.1f} min")
print(f"  Samples/sec:     {train_results.get('train_samples_per_second', 0):.1f}")
if gpu_info:
    print(f"  GPU:             {gpu_info['gpu_name']}")
    print(f"  Peak VRAM:       {gpu_info['allocated_gb']} GB / {gpu_info['total_gb']} GB")
print(f"  Output:          {OUTPUT_DIR}")
print(f"{'='*60}")

# --- 5. Plot loss curve ---
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))

ax1.plot(steps, losses, "b-", linewidth=1.5, label="Training Loss")
ax1.set_xlabel("Step")
ax1.set_ylabel("Loss")
ax1.set_title(f"{AGENT_NAME} - Training Loss")
ax1.legend()
ax1.grid(True, alpha=0.3)

ax2.plot(steps, lrs, "r-", linewidth=1.5, label="Learning Rate")
ax2.set_xlabel("Step")
ax2.set_ylabel("Learning Rate")
ax2.set_title(f"{AGENT_NAME} - LR Schedule")
ax2.legend()
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plot_path = f"{OUTPUT_DIR}/training_curves.png"
fig.savefig(plot_path, dpi=150, bbox_inches="tight")
plt.show()
print(f"Plot saved to: {plot_path}")

# --- 6. Save summary JSON (for cross-agent comparison later) ---
summary = {
    "agent": AGENT_NAME,
    "model": MODEL_NAME,
    "hyperparameters": hp,
    "batch_size": int(BATCH),
    "grad_acc": int(GRAD_ACC),
    "effective_batch": int(BATCH) * int(GRAD_ACC),
    "final_loss": final_loss,
    "best_loss": min(losses) if losses else None,
    "total_steps": total_steps,
    "training_time_min": round(train_results.get("train_runtime", 0) / 60, 1),
    "samples_per_second": train_results.get("train_samples_per_second", 0),
    "gpu": gpu_info,
    "output_dir": OUTPUT_DIR,
}
summary_path = f"{OUTPUT_DIR}/training_summary.json"
with open(summary_path, "w") as f:
    json.dump(summary, f, indent=2, ensure_ascii=False)
print(f"Summary saved to: {summary_path}")

## 3) Next agent

After this run finishes:
1. Change `AGENT_NAME` in cell 1
2. **Runtime → Restart runtime** (free GPU memory)
3. Run all cells again

Trained LoRA adapters are saved to Google Drive: `models/adapters/<agent>/run_YYYYmmdd_HHMMSS/`

### Recommended training order
1. `symptom_normalizer` (0.6B, fast)
2. `symptom_quality_grader` (0.6B, fast)
3. `rag_relevance_grader` (0.6B, fast)
4. `drug_evidence_grader` (0.6B, fast)
5. `diagnosis_generator` (1.7B, slower)

## 4) All-agents comparison (run after ALL agents are trained)

In [None]:
# ============================================================
# Cross-agent comparison (run AFTER all 5 agents are trained)
# Reads training_summary.json from each agent's latest run
# ============================================================
import json, glob, os
import matplotlib.pyplot as plt

AGENTS = ["symptom_normalizer", "symptom_quality_grader", "rag_relevance_grader",
          "drug_evidence_grader", "diagnosis_generator"]

summaries = []
for agent in AGENTS:
    agent_dir = f"{ADAPTER_OUTPUT_ROOT}/{agent}"
    # Find latest run
    runs = sorted(glob.glob(f"{agent_dir}/run_*"))
    if not runs:
        print(f"[SKIP] {agent}: no training runs found")
        continue
    latest = runs[-1]
    summary_file = f"{latest}/training_summary.json"
    if not os.path.exists(summary_file):
        print(f"[SKIP] {agent}: no training_summary.json in {latest}")
        continue
    with open(summary_file) as f:
        summaries.append(json.load(f))
    print(f"[OK] {agent}: loaded from {os.path.basename(latest)}")

if len(summaries) < 2:
    print("\nNeed at least 2 agents trained to compare. Train more agents first.")
else:
    # --- Comparison table ---
    print(f"\n{'='*90}")
    print(f"{'Agent':<25} {'Model':<16} {'Loss':>8} {'Steps':>7} {'Time':>8} {'VRAM':>8} {'Samples/s':>10}")
    print(f"{'-'*90}")
    for s in summaries:
        vram = f"{s['gpu'].get('allocated_gb', '?')}GB" if s.get('gpu') else "N/A"
        print(f"{s['agent']:<25} {s['model']:<16} {s.get('final_loss', 0):>8.4f} "
              f"{s['total_steps']:>7} {s['training_time_min']:>7.1f}m {vram:>8} "
              f"{s.get('samples_per_second', 0):>10.1f}")
    print(f"{'='*90}")

    # --- Comparison charts ---
    fig, axes = plt.subplots(1, 3, figsize=(18, 5))
    names = [s["agent"].replace("_", "\n") for s in summaries]
    colors = ["#4CAF50", "#2196F3", "#FF9800", "#9C27B0", "#F44336"][:len(summaries)]

    # Final loss
    axes[0].bar(names, [s.get("final_loss", 0) for s in summaries], color=colors)
    axes[0].set_title("Final Training Loss")
    axes[0].set_ylabel("Loss")

    # Training time
    axes[1].bar(names, [s.get("training_time_min", 0) for s in summaries], color=colors)
    axes[1].set_title("Training Time (min)")
    axes[1].set_ylabel("Minutes")

    # Peak VRAM
    vrams = [s.get("gpu", {}).get("allocated_gb", 0) for s in summaries]
    axes[2].bar(names, vrams, color=colors)
    axes[2].set_title("Peak GPU VRAM (GB)")
    axes[2].set_ylabel("GB")

    plt.tight_layout()
    comp_path = f"{ADAPTER_OUTPUT_ROOT}/all_agents_comparison.png"
    fig.savefig(comp_path, dpi=150, bbox_inches="tight")
    plt.show()
    print(f"\nComparison chart saved to: {comp_path}")

    # --- All loss curves in one plot ---
    fig2, ax = plt.subplots(figsize=(12, 6))
    for i, agent in enumerate(AGENTS):
        agent_dir = f"{ADAPTER_OUTPUT_ROOT}/{agent}"
        runs = sorted(glob.glob(f"{agent_dir}/run_*"))
        if not runs:
            continue
        state_file = f"{runs[-1]}/trainer_state.json"
        if not os.path.exists(state_file):
            continue
        with open(state_file) as f:
            state = json.load(f)
        logs = [e for e in state["log_history"] if "loss" in e]
        if logs:
            # Normalize steps to epoch fraction for comparison
            max_step = max(e["step"] for e in logs)
            epochs_total = state.get("epoch", 3)
            x = [e["step"] / max_step * epochs_total for e in logs]
            y = [e["loss"] for e in logs]
            ax.plot(x, y, linewidth=1.5, label=agent, color=colors[i] if i < len(colors) else None)

    ax.set_xlabel("Epoch")
    ax.set_ylabel("Loss")
    ax.set_title("All Agents - Training Loss Curves")
    ax.legend(fontsize=9)
    ax.grid(True, alpha=0.3)
    plt.tight_layout()
    curves_path = f"{ADAPTER_OUTPUT_ROOT}/all_agents_loss_curves.png"
    fig2.savefig(curves_path, dpi=150, bbox_inches="tight")
    plt.show()
    print(f"Loss curves saved to: {curves_path}")