# IntelHealth - Colab Training (Per-Agent)

This notebook is designed for **one agent per run**.
Pick `AGENT_NAME`, then run all cells.

All agents use `supervised_finetuning.py` with Qwen ChatML template.
- 4 grader/normalizer agents → Qwen3-0.6B (loss on response only)
- diagnosis_generator → Qwen3-1.7B (loss on all tokens via `--train_on_inputs`)

In [None]:
# Clone repo + pull latest
import os
repo_dir = '/content/Intel_Health'
if not os.path.exists(repo_dir):
    !git clone https://github.com/DemonRain7/Intel_Health.git {repo_dir}
else:
    !git -C {repo_dir} pull
%cd {repo_dir}

In [None]:
# Install dependencies (need transformers>=4.46 for Qwen3 support)
!pip -q install torch "transformers>=4.46" datasets peft accelerate bitsandbytes sentencepiece loguru

# HuggingFace login (Qwen3 requires authentication)
# Option 1: Colab Secrets (recommended, persistent across sessions)
#   Click the key icon in the left sidebar -> Add secret:
#   Name: HF_TOKEN    Value: your token from https://huggingface.co/settings/tokens
#   Then toggle "Notebook access" ON
# Option 2: Falls back to interactive login() if secret not found
import os
try:
    from google.colab import userdata
    os.environ["HF_TOKEN"] = userdata.get("HF_TOKEN")
    print("HF_TOKEN loaded from Colab Secrets")
except (ImportError, userdata.SecretNotFoundError):
    from huggingface_hub import login
    login()  # interactive fallback

In [None]:
# Mount Google Drive and set persistent paths
from google.colab import drive
drive.mount('/content/drive')

DRIVE_ROOT = '/content/drive/MyDrive/Code_Project/IntelHealth'
SFT_DATA_ROOT = f'{DRIVE_ROOT}/datasets/agent_sft'
ADAPTER_OUTPUT_ROOT = f'{DRIVE_ROOT}/models/adapters'
os.makedirs(SFT_DATA_ROOT, exist_ok=True)
os.makedirs(ADAPTER_OUTPUT_ROOT, exist_ok=True)
print('SFT_DATA_ROOT:', SFT_DATA_ROOT)
print('ADAPTER_OUTPUT_ROOT:', ADAPTER_OUTPUT_ROOT)

## 1) Select Agent (single run)

Supported training agents:
- symptom_normalizer (SFT, 0.6B)
- symptom_quality_grader (SFT, 0.6B)
- rag_relevance_grader (SFT, 0.6B)
- drug_evidence_grader (SFT, 0.6B)
- diagnosis_generator (SFT + train_on_inputs, 1.7B)

In [None]:
import os
import subprocess, shlex
from pathlib import Path
from datetime import datetime

AGENT_NAME = "symptom_quality_grader"  # <-- change this each run

MODEL_BY_AGENT = {
    "symptom_normalizer":     "Qwen/Qwen3-0.6B",
    "symptom_quality_grader": "Qwen/Qwen3-0.6B",
    "rag_relevance_grader":   "Qwen/Qwen3-0.6B",
    "drug_evidence_grader":   "Qwen/Qwen3-0.6B",
    "diagnosis_generator":    "Qwen/Qwen3-1.7B",
}

# --train_file_dir expects a DIRECTORY (globs *.jsonl inside)
DATA_DIR_BY_AGENT = {
    "symptom_normalizer":     f"{SFT_DATA_ROOT}/symptom_normalizer",
    "symptom_quality_grader": f"{SFT_DATA_ROOT}/symptom_quality_grader",
    "rag_relevance_grader":   f"{SFT_DATA_ROOT}/rag_relevance_grader",
    "drug_evidence_grader":   f"{SFT_DATA_ROOT}/drug_evidence_grader",
    "diagnosis_generator":    f"{SFT_DATA_ROOT}/diagnosis_generator",
}

OUTPUT_BY_AGENT = {
    "symptom_normalizer":     f"{ADAPTER_OUTPUT_ROOT}/symptom_normalizer",
    "symptom_quality_grader": f"{ADAPTER_OUTPUT_ROOT}/symptom_quality_grader",
    "rag_relevance_grader":   f"{ADAPTER_OUTPUT_ROOT}/rag_relevance_grader",
    "drug_evidence_grader":   f"{ADAPTER_OUTPUT_ROOT}/drug_evidence_grader",
    "diagnosis_generator":    f"{ADAPTER_OUTPUT_ROOT}/diagnosis_generator",
}

assert AGENT_NAME in MODEL_BY_AGENT, f"Unknown agent: {AGENT_NAME}"

for _agent_dir in OUTPUT_BY_AGENT.values():
    os.makedirs(_agent_dir, exist_ok=True)

MODEL_NAME = MODEL_BY_AGENT[AGENT_NAME]
TRAIN_DIR  = DATA_DIR_BY_AGENT[AGENT_NAME]
OUTPUT_BASE_DIR = OUTPUT_BY_AGENT[AGENT_NAME]
RUN_NAME = datetime.now().strftime("run_%Y%m%d_%H%M%S_%f")
OUTPUT_DIR = f"{OUTPUT_BASE_DIR}/{RUN_NAME}"

print(f"AGENT_NAME: {AGENT_NAME}")
print(f"MODEL_NAME: {MODEL_NAME}")
print(f"TRAIN_DIR:  {TRAIN_DIR}")
print(f"OUTPUT_DIR: {OUTPUT_DIR}")

## 2) Build command

All agents use `supervised_finetuning.py` with Qwen ChatML template.
- `diagnosis_generator` adds `--train_on_inputs` (loss on all tokens, DAPT-like behavior)
- Others only compute loss on model response tokens

In [None]:
SFT_SCRIPT = Path("training/supervised_finetuning.py")

# =============================================================
# Per-agent hyperparameters (edit freely, auto-selected by AGENT_NAME)
# =============================================================
AGENT_HP = {
    # DAPT: 1.7B model, 5000 MCQ samples, output=1 token
    "diagnosis_generator": {
        "epochs":     2,
        "lr":         "1e-4",
        "lora_r":     "32",
        "lora_alpha": "64",
        "max_len":    "256",
    },
    # SFT: 0.6B, 1500 samples, long input + long JSON output
    "symptom_normalizer": {
        "epochs":     3,
        "lr":         "2e-4",
        "lora_r":     "64",
        "lora_alpha": "128",
        "max_len":    "512",
    },
    # SFT: 0.6B, ~1500 samples, moderate input + short JSON output
    "symptom_quality_grader": {
        "epochs":     3,
        "lr":         "2e-4",
        "lora_r":     "64",
        "lora_alpha": "128",
        "max_len":    "256",
    },
    "rag_relevance_grader": {
        "epochs":     3,
        "lr":         "2e-4",
        "lora_r":     "64",
        "lora_alpha": "128",
        "max_len":    "256",
    },
    "drug_evidence_grader": {
        "epochs":     3,
        "lr":         "2e-4",
        "lora_r":     "64",
        "lora_alpha": "128",
        "max_len":    "256",
    },
}

# =============================================================
# Shared hyperparameters (A100-40GB optimized)
# =============================================================
BATCH = "8"
GRAD_ACC = "2"
LORA_DROPOUT = "0.05"

hp = AGENT_HP[AGENT_NAME]

cmd = [
    "python", str(SFT_SCRIPT),
    "--model_name_or_path",          MODEL_NAME,
    "--tokenizer_name_or_path",      MODEL_NAME,
    "--train_file_dir",              TRAIN_DIR,
    "--output_dir",                  OUTPUT_DIR,
    "--template_name",               "qwen",
    "--do_train",
    "--fp16",
    "--gradient_checkpointing",
    "--per_device_train_batch_size", BATCH,
    "--gradient_accumulation_steps", GRAD_ACC,
    "--num_train_epochs",            str(hp["epochs"]),
    "--learning_rate",               hp["lr"],
    "--lora_rank",                   hp["lora_r"],
    "--lora_alpha",                  hp["lora_alpha"],
    "--lora_dropout",                LORA_DROPOUT,
    "--model_max_length",            hp["max_len"],
    "--logging_steps",               "10",
    "--save_strategy",               "epoch",
]

# diagnosis_generator: loss on ALL tokens (DAPT-like)
if AGENT_NAME == "diagnosis_generator":
    cmd.append("--train_on_inputs")

print(f"Agent: {AGENT_NAME}")
print(f"Hyperparameters: {hp}")
print(f"Batch: {BATCH} x Grad_Acc: {GRAD_ACC} = Effective: {int(BATCH) * int(GRAD_ACC)}")
print(f"\nCommand:\n{' ' .join(shlex.quote(x) for x in cmd)}")


In [None]:
# Run training (real-time output)
import sys, subprocess

proc = subprocess.Popen(
    cmd,
    stdout=subprocess.PIPE,
    stderr=subprocess.STDOUT,   # merge stderr into stdout
    text=True,
    bufsize=1,                  # line-buffered
)

# Stream output line by line
for line in proc.stdout:
    print(line, end="", flush=True)

ret = proc.wait()
if ret != 0:
    raise RuntimeError(f"Training failed with exit code {ret}")
print(f"\nTraining complete! Adapter saved to: {OUTPUT_DIR}")

## 3) Next agent

After this run finishes:
1. Change `AGENT_NAME` in cell 1
2. **Runtime → Restart runtime** (free GPU memory)
3. Run all cells again

Trained LoRA adapters are saved to Google Drive: `models/adapters/<agent>/run_YYYYmmdd_HHMMSS/`

### Recommended training order
1. `symptom_normalizer` (0.6B, fast)
2. `symptom_quality_grader` (0.6B, fast)
3. `rag_relevance_grader` (0.6B, fast)
4. `drug_evidence_grader` (0.6B, fast)
5. `diagnosis_generator` (1.7B, slower)