# Stage 2: Helpful Response Fine-tuning (Gemma-7B-IT, QLoRA) — Colab Notebook

This notebook fine-tunes Google Gemma-7B-IT using QLoRA on the Anthropic Helpful-Harmless dataset, logs experiments to Weights & Biases, and optionally evaluates helpfulness and safety deltas using your Stage 1 safety classifier.

Notes:
- You need to accept the Gemma model license on Hugging Face Hub with your account before training.
- You will login to Hugging Face and W&B via Colab widgets (no plaintext secrets).
- If you have a Stage 1 package zip in Google Drive (safety_text_classifier_trained_*.zip), this notebook will auto-extract it for safety filtering and evaluation.


In [None]:
# Minimal setup for Colab: ensure GPU and install uv (we'll use repo-pinned deps)
import torch
assert torch.cuda.is_available(), 'CUDA not available. Please enable GPU in Runtime > Change runtime type > Hardware accelerator: GPU.'
!pip -q install -U uv


In [None]:
# GPU check & memory tweaks
import torch, os
print('GPU available:', torch.cuda.is_available())
if torch.cuda.is_available():
    print('GPU name:', torch.cuda.get_device_name(0))
    # Helpful memory settings on Colab
    os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True,max_split_size_mb:512'


## Repository setup
You have two options:
- A) Mount Google Drive if you already have your repo under Drive (recommended)
- B) Clone your GitHub repository (replace the placeholder URL)


In [None]:
# Clone repo from GitHub and (optionally) mount Drive for model assets
USE_DRIVE_FOR_ASSETS = True  # Mount Drive to fetch large checkpoints only

import os, glob
from pathlib import Path
repo_root = '/content/ml-learning'

# Always clone or pull latest code from GitHub
if not os.path.exists(repo_root):
    !git clone https://github.com/Jai-Dhiman/ml-learning {repo_root}
else:
    print('Repo path exists; pulling latest changes...')
%cd {repo_root}
!git pull --ff-only

# Mount Drive only for model artifacts (e.g., Stage 1 zip)
if USE_DRIVE_FOR_ASSETS:
    from google.colab import drive
    try:
        drive.mount('/content/drive')
        print('Drive mounted for model assets.')
    except Exception as e:
        print('Drive not mounted. Proceeding without Drive assets. Error:', e)

%cd {repo_root}/helpful-finetuning
!pwd

# Create and sync a project-local environment pinned to repo deps
!uv python install 3.11
!uv venv --python 3.11
!bash -lc 'source .venv/bin/activate && uv sync'

# Ensure bitsandbytes (GPU) and Triton are present in the venv (bootstrap pip in venv, then pip)
!bash -lc "cd /content/ml-learning/helpful-finetuning && source .venv/bin/activate && which python && python -V && curl -sS https://bootstrap.pypa.io/get-pip.py -o /tmp/get-pip.py && python /tmp/get-pip.py && python -m pip install --upgrade pip setuptools wheel && python -m pip uninstall -y bitsandbytes triton torch torchvision torchaudio || true"
!bash -lc "source .venv/bin/activate && python -m pip install --index-url https://download.pytorch.org/whl/cu121 torch==2.3.1 torchvision==0.18.1 torchaudio==2.3.1"
!bash -lc "source .venv/bin/activate && python -m pip install triton==2.3.1"
!bash -lc "source .venv/bin/activate && ( \
    python -m pip install --no-cache-dir --force-reinstall --no-deps --index-url https://huggingface.github.io/bitsandbytes-wheels/cu121/ bitsandbytes==0.44.1 \
 || python -m pip install --no-cache-dir --force-reinstall --no-deps --index-url https://huggingface.github.io/bitsandbytes-wheels/cu121/ bitsandbytes==0.44.0 \
 || python -m pip install --no-cache-dir --force-reinstall --no-deps --index-url https://huggingface.github.io/bitsandbytes-wheels/cu121/ bitsandbytes==0.43.2 \
 || python -m pip install --no-cache-dir --force-reinstall --no-deps --index-url https://jllllll.github.io/bitsandbytes-wheels/cu121/ bitsandbytes==0.44.1 \
 || python -m pip install --no-cache-dir --force-reinstall --no-deps --index-url https://jllllll.github.io/bitsandbytes-wheels/cu121/ bitsandbytes==0.44.0 \
 || python -m pip install --no-cache-dir --force-reinstall --no-deps --index-url https://jllllll.github.io/bitsandbytes-wheels/cu121/ bitsandbytes==0.43.2 \
)"
!bash -lc "source .venv/bin/activate && python -c 'import importlib.metadata as im, torch; import bitsandbytes as bnb; print(\"torch version:\", torch.__version__, \"CUDA:\", torch.version.cuda, \"is_available:\", torch.cuda.is_available()); print(\"triton version:\", im.version(\"triton\")); print(\"bnb file:\", bnb.__file__); print(\"bnb version:\", getattr(bnb, \"__version__\", \"n/a\")); print(\"bnb dist:\", im.version(\"bitsandbytes\"))'"
!bash -lc "source .venv/bin/activate && python -m pip show bitsandbytes"
!bash -lc 'source .venv/bin/activate && python -m pip show bitsandbytes'


In [None]:
# If a Stage 1 zip exists in Drive, auto-extract to expected path for safety filtering/eval
import os, glob, shutil
dst_dir = '/content/ml-learning/safety-text-classifier'
os.makedirs(dst_dir, exist_ok=True)

# Preferred exact path (provided by user)
exact_zip = '/content/drive/MyDrive/safety_text_classifier_trained_20250916_0632.zip'
candidates = []
if os.path.exists(exact_zip):
    candidates = [exact_zip]
else:
    # Fallback patterns
    pats = [
        '/content/drive/MyDrive/safety_text_classifier_trained_*.zip',
        '/content/drive/MyDrive/safety-text-classifier/safety_text_classifier_trained_*.zip',
    ]
    for p in pats:
        candidates.extend(glob.glob(p))

if candidates:
    candidates.sort(reverse=True)
    print('Found Stage 1 package:', candidates[0])
    !unzip -o "{candidates[0]}" -d {dst_dir}
    # Normalize potential nested directory structure for checkpoints
    best_parent = os.path.join(dst_dir, 'checkpoints')
    if not os.path.isdir(best_parent):
        found = None
        for root, dirs, files in os.walk(dst_dir):
            if os.path.basename(root) == 'checkpoints':
                found = root
                break
        if found and found != best_parent:
            os.makedirs(best_parent, exist_ok=True)
            for name in os.listdir(found):
                s = os.path.join(found, name)
                t = os.path.join(best_parent, name)
                if os.path.isdir(s):
                    if os.path.exists(t):
                        shutil.rmtree(t)
                    shutil.copytree(s, t)
                else:
                    shutil.copy2(s, t)
            print('Flattened checkpoints to:', best_parent)
else:
    print('No Stage 1 zip found on Drive. If checkpoints are in the repo path, safety filter will use them.')
    print('Otherwise safety filter defaults to safe to avoid blocking training.')

# After extraction, update Colab override to enable safety and set dataset subset
import yaml
cfg_path = '/content/ml-learning/helpful-finetuning/configs/colab_config.yaml'
try:
    exists_best = os.path.isdir('/content/ml-learning/safety-text-classifier/checkpoints/best')
    exists_best_model = os.path.isdir('/content/ml-learning/safety-text-classifier/checkpoints/best_model')
    if exists_best or exists_best_model:
        with open(cfg_path, 'r') as f:
            cfg = yaml.safe_load(f) or {}
        cfg.setdefault('safety', {})
        cfg['safety']['enabled'] = True
        cfg['safety']['classifier_config_path'] = '../safety-text-classifier/configs/base_config.yaml'
        cfg['safety']['checkpoint_dir'] = '/content/ml-learning/safety-text-classifier/checkpoints/best' if exists_best else '/content/ml-learning/safety-text-classifier/checkpoints/best_model'
        cfg.setdefault('dataset', {})
        if not cfg['dataset'].get('subset'):
            cfg['dataset']['subset'] = 'default'
        with open(cfg_path, 'w') as f:
            yaml.safe_dump(cfg, f, sort_keys=False)
        print('Enabled safety and set dataset.subset in colab_config.yaml')
    else:
        print('Stage 1 checkpoints not found yet; safety will remain as configured.')
except Exception as e:
    print('Could not update colab_config.yaml:', e)


In [None]:
# Login to Hugging Face (required for Gemma model access)
# Secure login without storing/printing your token.
# If getpass has issues in Colab, this cell will fall back to the interactive widget provided by huggingface_hub.login().
import os
os.environ.pop("HF_TOKEN", None)
os.environ.pop("HUGGINGFACEHUB_API_TOKEN", None)
from huggingface_hub import login, HfApi
try:
    import getpass as gp
    raw = gp.getpass("Paste your Hugging Face token (input hidden): ")
    token = raw.decode() if isinstance(raw, (bytes, bytearray)) else raw
    if not isinstance(token, str):
        raise TypeError(f"Unexpected token type: {type(token).__name__}")
    token = token.strip()
    if not token:
        raise ValueError("Empty token provided")
    login(token=token, add_to_git_credential=False)
    who = HfApi().whoami(token=token)
    print(f"Logged in as: {who.get('name') or who.get('email') or 'OK'}")
except Exception as e:
    print(f"[HF Login] getpass flow failed: {e}")
    print("Falling back to interactive login widget...")
    login()
    try:
        who = HfApi().whoami()
        print(f"Logged in as: {who.get('name') or who.get('email') or 'OK'}")
    except Exception as e2:
        print(f"[HF Login] Verification skipped: {e2}")


In [None]:
# Login to Weights & Biases for experiment tracking
import wandb
wandb.login()  # Enter W&B API key in widget


In [None]:
# Install Stage 1 deps (JAX/Flax/Optax), ensure numpy compatibility, then tiny dataset load
!bash -lc 'source .venv/bin/activate && python -m pip install "jax[cpu]==0.4.38" "flax>=0.8.4,<0.9.0" "optax>=0.2.2,<0.3.0"'
!bash -lc 'source .venv/bin/activate && python -m pip install "numpy<2.0.0" --force-reinstall'
!bash -lc 'source .venv/bin/activate && python -c "from datasets import load_dataset; ds=load_dataset(\"Anthropic/hh-rlhf\",\"default\",split=\"test[:1]\"); print(\"Dataset preflight OK - tiny load:\", len(ds))"'
# Run Stage 2 preflight (no Gemma downloads)
!bash -lc 'cd /content/ml-learning/helpful-finetuning && source .venv/bin/activate && WANDB_DISABLED=true python -m src.training.train_qlora --config configs/base_config.yaml --override configs/colab_config.yaml --preflight-only --disable-wandb'


## Train: Gemma-7B-IT with QLoRA (Colab-optimized overrides)
- Base config: `configs/base_config.yaml`
- Overrides:   `configs/colab_config.yaml` (smaller batch/seq_len, GA)


In [None]:
# Start training
!bash -lc 'cd /content/ml-learning/helpful-finetuning && source .venv/bin/activate && WANDB_DISABLED=true python -m src.training.train_qlora --config configs/base_config.yaml --override configs/colab_config.yaml --disable-wandb'


## Evaluate (quick subset)
Computes a simple helpfulness heuristic vs base and safety deltas using Stage 1.


In [None]:
# Run evaluation (uses ./lora_adapters if present)
!bash -lc 'cd /content/ml-learning/helpful-finetuning && source .venv/bin/activate && python -m src.evaluation.evaluate_helpfulness --config configs/base_config.yaml'
