# Stage 2: Helpful Fine-Tuning

QLoRA fine-tuning on Anthropic/hh-rlhf helpful subset.

**Expected time**: ~2-3 hours on T4 GPU

In [None]:
# Cell 1: Setup
from google.colab import drive
import os

drive.mount('/content/drive')
os.chdir('/content')

!git clone https://github.com/Jai-Dhiman/ml-learning.git
os.chdir('/content/ml-learning/helpful-finetuning')

print(f"✅ Ready in: {os.getcwd()}")

In [None]:
# Cell 2: Install Dependencies
!pip install -q transformers==4.36.2 datasets==2.16.1 peft==0.7.1 trl==0.7.10
!pip install -q accelerate==0.25.0 bitsandbytes==0.41.3 torch==2.1.2 pyyaml

print("✅ Dependencies installed")

In [None]:
# Cell 3: PREFLIGHT TEST (1 sample)
import os
os.environ['WANDB_DISABLED'] = 'true'

print("="*70)
print("PREFLIGHT: Testing with 1 sample")
print("="*70)

!python src/training/train_qlora.py \
  --model-name google/gemma-2b-it \
  --dataset Anthropic/hh-rlhf \
  --dataset-split 'train[:1]' \
  --output-dir /tmp/preflight_stage2 \
  --lora-r 8 \
  --lora-alpha 16 \
  --batch-size 1 \
  --gradient-accumulation-steps 1 \
  --learning-rate 2e-4 \
  --max-steps 1 \
  --max-seq-length 512 \
  --logging-steps 1 \
  --save-steps 1000

print("\n" + "="*70)
print("✅ PREFLIGHT PASSED - Ready for full training")
print("="*70)

In [None]:
# Cell 4: Full Training (10K samples, 3 epochs)
!mkdir -p artifacts/stage2_helpful

print("="*70)
print("STAGE 2: HELPFUL FINE-TUNING")
print("="*70)

!python src/training/train_qlora.py \
  --model-name google/gemma-2b-it \
  --dataset Anthropic/hh-rlhf \
  --dataset-split 'train[:10000]' \
  --output-dir artifacts/stage2_helpful \
  --lora-r 16 \
  --lora-alpha 32 \
  --lora-dropout 0.05 \
  --batch-size 4 \
  --gradient-accumulation-steps 4 \
  --learning-rate 2e-4 \
  --num-epochs 3 \
  --max-seq-length 512 \
  --logging-steps 10 \
  --save-steps 500

print("\n✅ Stage 2 training complete!")

In [None]:
# Cell 5: Save to Drive
!mkdir -p /content/drive/MyDrive/ml-learning/artifacts/stage2_helpful
!cp -r artifacts/stage2_helpful/* /content/drive/MyDrive/ml-learning/artifacts/stage2_helpful/

print("✅ Saved to Google Drive")
print("Location: MyDrive/ml-learning/artifacts/stage2_helpful/")