In [None]:
print("="*80)
print("SETTING UP ENVIRONMENT")
print("="*80)

# Install required packages
import sys
import subprocess

def install_packages():
    """Install required packages for DPO training."""
    packages = [
        'transformers>=4.40.0',
        'torch>=2.0.0',
        'datasets>=2.16.0',
        'accelerate>=0.27.0',
        'peft>=0.8.0',
        'trl>=0.8.0',
        'bitsandbytes',
        'scipy',
    ]

    for package in packages:
        print(f"Installing {package}...")
        subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", package])

    print("✓ All packages installed!")

# Uncomment to install (run once)
install_packages()

SETTING UP ENVIRONMENT
Installing transformers>=4.40.0...
Installing torch>=2.0.0...
Installing datasets>=2.16.0...
Installing accelerate>=0.27.0...
Installing peft>=0.8.0...
Installing trl>=0.8.0...
Installing bitsandbytes...
Installing scipy...
✓ All packages installed!


In [None]:
# ============================================================================
# CELL 2: Check GPU and Imports
# ============================================================================
import torch
import os
from pathlib import Path

print("\n" + "="*80)
print("GPU CHECK")
print("="*80)
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA version: {torch.version.cuda}")
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
else:
    print("⚠️ WARNING: No GPU detected. Training will be VERY slow on CPU.")


GPU CHECK
PyTorch version: 2.9.0+cu126
CUDA available: True
CUDA version: 12.6
GPU: Tesla T4
GPU memory: 15.83 GB


In [None]:
# ============================================================================
# CELL 3: Configuration
# ============================================================================
print("\n" + "="*80)
print("CONFIGURATION")
print("="*80)

class Config:
    """Training configuration."""
    # Groups to train
    groups = ['us']  # Change to train other demographics

    # Model settings
    base_model = "Qwen/Qwen2.5-0.5B"  # Or "Qwen/Qwen2.5-0.5B" for faster training

    # Training hyperparameters
    epochs = 3
    batch_size = 4  # Reduce to 2 or 1 if OOM
    learning_rate = 5e-5
    beta = 0.1  # DPO beta parameter

    # LoRA settings
    lora_rank = 16  # Reduce to 8 if OOM
    lora_alpha = 32
    lora_dropout = 0.05

    # Paths
    data_dir = "./data/dpo"
    output_dir = "./results/dpo_models"

    # Other settings
    gradient_accumulation_steps = 1
    max_length = 512
    warmup_ratio = 0.1
    logging_steps = 20
    save_steps = 500

config = Config()

print(f"Training groups: {config.groups}")
print(f"Base model: {config.base_model}")
print(f"Epochs: {config.epochs}")
print(f"Batch size: {config.batch_size}")
print(f"Learning rate: {config.learning_rate}")
print(f"Beta: {config.beta}")
print(f"LoRA rank: {config.lora_rank}")


CONFIGURATION
Training groups: ['us']
Base model: Qwen/Qwen2.5-0.5B
Epochs: 3
Batch size: 4
Learning rate: 5e-05
Beta: 0.1
LoRA rank: 16


In [None]:
!tar -xzf dpo_data.tar.gz

In [None]:
# ============================================================================
# CELL 5: Download/Load Data
# ============================================================================
print("\n" + "="*80)
print("LOADING DATA")
print("="*80)

def download_prepared_data():
    """
    Download pre-prepared DPO training data.
    Alternative: Upload your local data/dpo/ folder to Colab.
    """
    from datasets import load_dataset
    import json

    print("Option 1: Upload data manually")
    print("  - Upload your local 'data/dpo/' folder to Colab")
    print("  - Or run prepare_dpo_data.py first")

    print("\nOption 2: Prepare data from PRISM")
    print("  - Loads PRISM dataset and creates preference pairs")

    # For now, assume data is uploaded
    data_path = Path(config.data_dir)
    if not data_path.exists():
        print(f"\n⚠️ Data directory not found: {data_path}")
        print("Please upload your data/dpo/ folder or run data preparation")
        return False

    # Check for group data
    for group in config.groups:
        group_file = data_path / group / "train.json"
        if not group_file.exists():
            print(f"⚠️ Missing data for group: {group}")
            return False

        # Count samples
        with open(group_file) as f:
            count = sum(1 for _ in f)
        print(f"✓ {group}: {count} training samples")

    return True

data_ready = download_prepared_data()


LOADING DATA
Option 1: Upload data manually
  - Upload your local 'data/dpo/' folder to Colab
  - Or run prepare_dpo_data.py first

Option 2: Prepare data from PRISM
  - Loads PRISM dataset and creates preference pairs
✓ us: 2505 training samples


In [None]:
# ============================================================================
# CELL 6: Training Functions
# ============================================================================
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from trl import DPOTrainer, DPOConfig
from datasets import load_dataset, Dataset
import json

def load_model_and_tokenizer(model_name):
    """Load base model with QLoRA configuration."""
    print(f"\nLoading model: {model_name}")

    # QLoRA config (4-bit quantization)
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16,
        bnb_4bit_use_double_quant=True,
    )

    # Load model
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        quantization_config=bnb_config,
        device_map="auto",
        trust_remote_code=True,
    )

    # Load tokenizer
    tokenizer = AutoTokenizer.from_pretrained(
        model_name,
        trust_remote_code=True,
    )

    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token

    # Prepare for training
    model = prepare_model_for_kbit_training(model)

    # LoRA config
    lora_config = LoraConfig(
        r=config.lora_rank,
        lora_alpha=config.lora_alpha,
        lora_dropout=config.lora_dropout,
        target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
        bias="none",
        task_type="CAUSAL_LM",
    )

    model = get_peft_model(model, lora_config)

    print(f"✓ Model loaded with LoRA")
    model.print_trainable_parameters()

    return model, tokenizer


def load_training_data(group_name):
    """Load DPO training data for a group."""
    data_file = Path(config.data_dir) / group_name / "train.json"

    print(f"\nLoading training data: {data_file}")

    # Load JSONL
    data = []
    with open(data_file) as f:
        for line in f:
            data.append(json.loads(line))

    print(f"✓ Loaded {len(data)} preference pairs")

    # Convert to HuggingFace dataset
    dataset = Dataset.from_list(data)

    return dataset


def train_dpo_model(group_name):
    """Train a DPO model for a specific demographic group."""
    print("\n" + "="*80)
    print(f"TRAINING DPO MODEL: {group_name.upper()}")
    print("="*80)

    # Load model and tokenizer
    model, tokenizer = load_model_and_tokenizer(config.base_model)

    # Load training data
    train_dataset = load_training_data(group_name)

    # Output directory
    output_dir = Path(config.output_dir) / group_name
    output_dir.mkdir(parents=True, exist_ok=True)

    # Training arguments (for TRL DPO)
    training_args = DPOConfig(
        output_dir=str(output_dir),
        num_train_epochs=config.epochs,
        per_device_train_batch_size=config.batch_size,
        gradient_accumulation_steps=config.gradient_accumulation_steps,
        learning_rate=config.learning_rate,
        logging_steps=config.logging_steps,
        save_steps=config.save_steps,
        save_total_limit=2,
        warmup_ratio=config.warmup_ratio,
        lr_scheduler_type="cosine",
        bf16=True,
        remove_unused_columns=False,
        report_to="none",
        beta=config.beta,  # DPO beta parameter goes here
        max_length=config.max_length,
        max_prompt_length=config.max_length // 2,
    )

    # DPO Trainer
    trainer = DPOTrainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        processing_class=tokenizer,  # Use processing_class instead of tokenizer
    )

    # Train
    print(f"\nStarting training for {config.epochs} epochs...")
    print(f"This may take 2-4 hours depending on GPU...")

    trainer.train()

    # Save final model
    final_dir = output_dir / "final"
    trainer.save_model(str(final_dir))
    tokenizer.save_pretrained(str(final_dir))

    print(f"\n✓ Model saved to: {final_dir}")

    # Save training info
    info = {
        'group': group_name,
        'base_model': config.base_model,
        'epochs': config.epochs,
        'batch_size': config.batch_size,
        'learning_rate': config.learning_rate,
        'beta': config.beta,
        'lora_rank': config.lora_rank,
        'training_samples': len(train_dataset),
    }

    with open(output_dir / "training_info.json", 'w') as f:
        json.dump(info, f, indent=2)

    # Clear memory
    del model
    del trainer
    torch.cuda.empty_cache()

    return final_dir

In [None]:
# ============================================================================
# CELL 7: Run Training
# ============================================================================
print("\n" + "="*80)
print("STARTING TRAINING PIPELINE")
print("="*80)

if not data_ready:
    print("⚠️ Data not ready. Please upload data first.")
else:
    trained_models = {}

    for group in config.groups:
        print(f"\n{'='*80}")
        print(f"Training model {config.groups.index(group) + 1}/{len(config.groups)}: {group}")
        print(f"{'='*80}")

        try:
            model_path = train_dpo_model(group)
            trained_models[group] = model_path
            print(f"\n✅ Successfully trained {group} model!")
        except Exception as e:
            print(f"\n❌ Error training {group} model: {e}")
            import traceback
            traceback.print_exc()

    print("\n" + "="*80)
    print("TRAINING COMPLETE")
    print("="*80)
    print(f"\nTrained models:")
    for group, path in trained_models.items():
        print(f"  {group}: {path}")

    print(f"\nNext steps:")
    print(f"1. Download models from: {config.output_dir}")
    print(f"2. Run evaluation: python scripts/evaluate_globalopinions.py")


STARTING TRAINING PIPELINE

Training model 1/1: us

TRAINING DPO MODEL: US

Loading model: Qwen/Qwen2.5-0.5B
✓ Model loaded with LoRA
trainable params: 2,162,688 || all params: 496,195,456 || trainable%: 0.4359

Loading training data: data/dpo/us/train.json
✓ Loaded 2505 preference pairs


Extracting prompt in train dataset:   0%|          | 0/2505 [00:00<?, ? examples/s]

Applying chat template to train dataset:   0%|          | 0/2505 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/2505 [00:00<?, ? examples/s]

The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': None, 'pad_token_id': 151643}.



Starting training for 3 epochs...
This may take 2-4 hours depending on GPU...




Step,Training Loss
20,0.6968
40,0.6937
60,0.6915
80,0.6839
100,0.6698
120,0.6726
140,0.6528
160,0.6818
180,0.7129
200,0.6639




Step,Training Loss
20,0.6968
40,0.6937
60,0.6915
80,0.6839
100,0.6698
120,0.6726
140,0.6528
160,0.6818
180,0.7129
200,0.6639


In [None]:
   import torch
   torch.cuda.empty_cache()