# WordPress Translation Fine-Tuning (Mistral 7B)

Fine-tune Mistral 7B for Englishâ†’Dutch WordPress translation using QLoRA on Google Colab Free Tier.

**Requirements:**
- Google Colab with GPU runtime (T4 is fine)
- Upload your `train_subset.jsonl` file

**Estimated time:** 2-4 hours for 10k examples

## 1. Setup Environment

In [None]:
# Check GPU
!nvidia-smi

In [None]:
# Install dependencies (pinned versions for compatibility)
!pip install -q torch torchvision torchaudio
!pip install -q transformers==4.44.0 datasets>=2.17.0 accelerate>=0.27.0
!pip install -q peft>=0.8.2 trl==0.9.6 bitsandbytes>=0.42.0
!pip install -q scipy

In [None]:
import torch
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

## 2. Upload Dataset

Run this cell and upload the `train_subset.jsonl` file generated by the export script.

In [None]:
from google.colab import files
import os

# Upload the dataset file
print("Upload train_subset.jsonl (generated by export_for_colab.py)")
uploaded = files.upload()

# Get the uploaded filename
DATASET_FILE = list(uploaded.keys())[0]
print(f"Uploaded: {DATASET_FILE}")

In [None]:
# Load dataset
from datasets import load_dataset

dataset = load_dataset('json', data_files=DATASET_FILE, split='train')
print(f"Dataset size: {len(dataset)} examples")
print(f"Columns: {dataset.column_names}")
print(f"\nSample example:\n{dataset[0]['text'][:500]}...")

## 3. Load Model with 4-bit Quantization

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch

MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.2"

# 4-bit quantization config (essential for free Colab)
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)

print("Loading model with 4-bit quantization...")
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=False,
    torch_dtype=torch.float16,
)

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

print(f"Model loaded! Memory used: {torch.cuda.memory_allocated() / 1e9:.2f} GB")

## 4. Configure LoRA

In [None]:
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

# Prepare model for k-bit training
model = prepare_model_for_kbit_training(model)

# LoRA configuration
lora_config = LoraConfig(
    r=32,  # Reduced from 64 for memory
    lora_alpha=16,
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=[
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj"
    ],
)

model = get_peft_model(model, lora_config)

# Print trainable parameters
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
total_params = sum(p.numel() for p in model.parameters())
print(f"Trainable parameters: {trainable_params:,} ({100 * trainable_params / total_params:.2f}%)")

## 5. Training Configuration

In [None]:
from trl import SFTConfig, SFTTrainer

# SFTConfig combines TrainingArguments with SFT-specific settings
sft_config = SFTConfig(
    output_dir="./wp-translation-adapter",
    num_train_epochs=1,  # Start with 1 epoch for testing
    per_device_train_batch_size=1,
    gradient_accumulation_steps=8,  # Effective batch size = 8
    learning_rate=2e-4,
    warmup_ratio=0.03,
    weight_decay=0.001,
    max_grad_norm=0.3,
    lr_scheduler_type="cosine",
    logging_steps=25,
    save_steps=500,
    save_total_limit=2,
    fp16=True,  # Use FP16 for T4
    gradient_checkpointing=True,
    optim="paged_adamw_8bit",
    report_to="none",  # Disable wandb etc.
    seed=42,
    group_by_length=True,
    # SFT-specific settings
    max_seq_length=512,
    dataset_text_field="text",
    packing=False,
)

print("Training configuration ready!")
print(f"- Epochs: {sft_config.num_train_epochs}")
print(f"- Batch size: {sft_config.per_device_train_batch_size}")
print(f"- Gradient accumulation: {sft_config.gradient_accumulation_steps}")
print(f"- Effective batch size: {sft_config.per_device_train_batch_size * sft_config.gradient_accumulation_steps}")

## 6. Start Training

In [None]:
# Create trainer (new API uses 'processing_class' instead of 'tokenizer')
trainer = SFTTrainer(
    model=model,
    args=sft_config,
    train_dataset=dataset,
    processing_class=tokenizer,
)

print(f"\nStarting training... This may take 2-4 hours.")
print("="*50)

In [None]:
# Train!
import time
start_time = time.time()

trainer.train()

elapsed = time.time() - start_time
print(f"\n{'='*50}")
print(f"Training complete!")
print(f"Time elapsed: {elapsed/3600:.2f} hours")

## 7. Save Adapter Weights

In [None]:
# Save adapter
ADAPTER_PATH = "./wp-translation-adapter-final"

model.save_pretrained(ADAPTER_PATH)
tokenizer.save_pretrained(ADAPTER_PATH)

print(f"Adapter saved to {ADAPTER_PATH}")
!ls -la {ADAPTER_PATH}

In [None]:
# Download adapter to your local machine
import shutil

# Create zip file
shutil.make_archive("wp-translation-adapter", "zip", ADAPTER_PATH)

# Download
files.download("wp-translation-adapter.zip")
print("\nDownload started! Save this file to use the adapter locally.")

## 8. Test the Model

In [None]:
# Test translation
def translate(text, source_lang="English", target_lang="Dutch"):
    prompt = f"""<s>[INST] Translate the following WordPress text from {source_lang} to {target_lang}. Preserve any placeholders like %s, %d, or {{name}}.

{text} [/INST]"""
    
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=256,
            temperature=0.1,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id,
        )
    
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    # Extract just the translation (after [/INST])
    if "[/INST]" in response:
        response = response.split("[/INST]")[-1].strip()
    return response

In [None]:
# Test with some WordPress strings
test_strings = [
    "Add to cart",
    "Your order has been placed successfully.",
    "Please enter a valid email address.",
    "%d items in your cart",
    "Hello, {name}! Welcome back.",
]

print("Translation Tests:")
print("=" * 60)
for text in test_strings:
    translation = translate(text)
    print(f"EN: {text}")
    print(f"NL: {translation}")
    print("-" * 60)

## Next Steps

1. **Download the adapter zip file** - it contains the LoRA weights
2. **Copy to your local project**: `models/adapters/nl/`
3. **Run evaluation locally**: `./run.py evaluate nl`
4. **For more training**: Increase `num_train_epochs` or use more data