# Stage 2: Helpful Fine-Tuning

QLoRA fine-tuning on Anthropic/hh-rlhf helpful subset.

**Expected time**: ~2-3 hours on T4 GPU

In [None]:
# Cell 1: Setup
from google.colab import drive
import os

drive.mount('/content/drive')
os.chdir('/content')

!git clone https://github.com/Jai-Dhiman/ml-learning.git
os.chdir('/content/ml-learning/helpful-finetuning')

print(f"✅ Ready in: {os.getcwd()}")

In [None]:
# Cell 2: Login to Hugging Face (required for Gemma model access)
# Secure login without storing/printing your token.
# If getpass has issues in Colab, this cell will fall back to the interactive widget provided by huggingface_hub.login().
import os
os.environ.pop("HF_TOKEN", None)
os.environ.pop("HUGGINGFACEHUB_API_TOKEN", None)
from huggingface_hub import login, HfApi
try:
    import getpass as gp
    raw = gp.getpass("Paste your Hugging Face token (input hidden): ")
    token = raw.decode() if isinstance(raw, (bytes, bytearray)) else raw
    if not isinstance(token, str):
        raise TypeError(f"Unexpected token type: {type(token).__name__}")
    token = token.strip()
    if not token:
        raise ValueError("Empty token provided")
    login(token=token, add_to_git_credential=False)
    who = HfApi().whoami(token=token)
    print(f"Logged in as: {who.get('name') or who.get('email') or 'OK'}")
except Exception as e:
    print(f"[HF Login] getpass flow failed: {e}")
    print("Falling back to interactive login widget...")
    login()
    try:
        who = HfApi().whoami()
        print(f"Logged in as: {who.get('name') or who.get('email') or 'OK'}")
    except Exception as e2:
        print(f"[HF Login] Verification skipped: {e2}")


In [None]:
# Cell 3: Install Dependencies (using uv and pyproject.toml)
import torch
import os

# Check GPU availability
print('GPU available:', torch.cuda.is_available())
if torch.cuda.is_available():
    print('GPU name:', torch.cuda.get_device_name(0))
    !nvidia-smi
    # Helpful memory settings on Colab
    os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True,max_split_size_mb:512'

# Install uv package manager
!pip install -q uv

# Create and sync virtual environment with pyproject.toml dependencies
print('\n=== Setting up virtual environment ===')
!uv python install 3.11
!uv venv --python 3.11

# Sync dependencies from pyproject.toml
print('\n=== Installing project dependencies ===')
!bash -lc 'source .venv/bin/activate && uv sync'

# Install PyTorch with CUDA 12.1 support
print('\n=== Installing PyTorch with CUDA 12.1 ===')
!bash -lc "source .venv/bin/activate && python -m pip install --index-url https://download.pytorch.org/whl/cu121 torch==2.3.1 torchvision==0.18.1 torchaudio==2.3.1"

# Install JAX/Flax/Optax for Stage 1 safety classifier (if needed)
print('\n=== Installing JAX ecosystem for safety classifier ===')
!bash -lc 'source .venv/bin/activate && python -m pip install "jax[cpu]==0.4.38" "flax>=0.8.4,<0.9.0" "optax>=0.2.2,<0.3.0"'

# Ensure numpy compatibility
!bash -lc 'source .venv/bin/activate && python -m pip install "numpy<2.0.0" --force-reinstall'

# Quick dataset preflight test
print('\n=== Testing dataset access ===')
!bash -lc 'source .venv/bin/activate && python -c "from datasets import load_dataset; ds=load_dataset(\"Anthropic/hh-rlhf\",\"default\",split=\"test[:1]\"); print(\"Dataset preflight OK - tiny load:\", len(ds))"'

print('\n✅ Dependencies installed successfully!')

In [None]:
# Cell 4: PREFLIGHT TEST (Config validation only)
print("="*70)
print("PREFLIGHT: Validating configuration and dataset access")
print("="*70)

!bash -lc 'cd /content/ml-learning/helpful-finetuning && source .venv/bin/activate && WANDB_DISABLED=true python -m src.training.train_qlora --config configs/base_config.yaml --override configs/colab_config.yaml --preflight-only --disable-wandb'

print("\n" + "="*70)
print("✅ PREFLIGHT PASSED - Ready for full training")
print("="*70)

In [None]:
# Cell 5: Full Training
print("="*70)
print("STAGE 2: HELPFUL FINE-TUNING")
print("="*70)

!bash -lc 'cd /content/ml-learning/helpful-finetuning && source .venv/bin/activate && WANDB_DISABLED=true python -m src.training.train_qlora --config configs/base_config.yaml --override configs/colab_config.yaml --disable-wandb'

print("\n✅ Stage 2 training complete!")

In [None]:
# Cell 6: Save Results to Google Drive
import os
import shutil
from pathlib import Path

print('=' * 70)
print('SAVING TRAINING RESULTS TO GOOGLE DRIVE')
print('=' * 70)

# Define source and destination paths
base_dir = '/content/ml-learning/helpful-finetuning'
drive_base = '/content/drive/MyDrive/ml-learning/stage2_results'

# Create destination directory
os.makedirs(drive_base, exist_ok=True)

# Find and copy all result directories
results_found = False

# 1. Copy outputs directory (training checkpoints)
outputs_dir = os.path.join(base_dir, 'outputs')
if os.path.exists(outputs_dir):
    dest = os.path.join(drive_base, 'outputs')
    print(f'Copying {outputs_dir} -> {dest}')
    if os.path.exists(dest):
        shutil.rmtree(dest)
    shutil.copytree(outputs_dir, dest)
    size = sum(f.stat().st_size for f in Path(dest).rglob('*') if f.is_file())
    print(f'✅ Copied outputs ({size / 1024**2:.1f} MB)')
    results_found = True
else:
    print('⚠️ No outputs directory found')

# 2. Copy final_model
final_model_dir = os.path.join(base_dir, 'final_model')
if os.path.exists(final_model_dir):
    dest = os.path.join(drive_base, 'final_model')
    print(f'\nCopying {final_model_dir} -> {dest}')
    if os.path.exists(dest):
        shutil.rmtree(dest)
    shutil.copytree(final_model_dir, dest)
    size = sum(f.stat().st_size for f in Path(dest).rglob('*') if f.is_file())
    print(f'✅ Copied final_model ({size / 1024**2:.1f} MB)')
    results_found = True
else:
    print('⚠️ No final_model directory found')

# 3. Copy LoRA adapters from artifacts
lora_dir = '/content/ml-learning/artifacts/stage2_artifacts/lora_adapters'
if os.path.exists(lora_dir):
    dest = os.path.join(drive_base, 'lora_adapters')
    print(f'\nCopying {lora_dir} -> {dest}')
    if os.path.exists(dest):
        shutil.rmtree(dest)
    shutil.copytree(lora_dir, dest)
    size = sum(f.stat().st_size for f in Path(dest).rglob('*') if f.is_file())
    print(f'✅ Copied lora_adapters ({size / 1024**2:.1f} MB)')
    results_found = True
else:
    print('⚠️ No lora_adapters directory found')

# Summary
print('\n' + '=' * 70)
if results_found:
    print('✅ Results saved to Google Drive!')
    print(f'Location: MyDrive/ml-learning/stage2_results/')
    print('\nTo download from Colab:')
    print('  1. Click the folder icon on the left sidebar')
    print('  2. Navigate to: drive/MyDrive/ml-learning/stage2_results/')
    print('  3. Right-click on folders and select Download')
else:
    print('❌ No results found to save!')
    print('\nSearching for any checkpoint files...')
    !find /content/ml-learning -name '*checkpoint*' -o -name 'adapter_*' -o -name 'pytorch_model.bin' 2>/dev/null | head -20
print('=' * 70)