In [None]:
# Qwen2-VL Fine-tuning with LoRA on Google Colab

This notebook fine-tunes the Qwen2-VL-2B-Instruct model using LoRA (Low-Rank Adaptation) on your first principles dataset.

**Prerequisites:**
- Enable GPU runtime: Runtime → Change runtime type → Hardware accelerator → GPU
- Upload your dataset file to Colab or mount Google Drive


In [None]:
## 1. Setup and Installation


In [None]:
# Install required packages
!pip install -q transformers datasets accelerate peft trl bitsandbytes wandb torch torchvision
!pip install -q qwen-vl-utils

# Check GPU availability
import torch
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")


In [None]:
## 2. Authentication


In [None]:
# Login to Hugging Face and Weights & Biases
from huggingface_hub import login
import wandb

# HuggingFace login
login()

# WandB login
wandb.login()


In [None]:
## 3. Upload Dataset

Upload your `first_principles_dataset.json` file using the file upload widget below, or mount Google Drive if your dataset is stored there.


In [None]:
from google.colab import files
import json
import os

# Option 1: Upload dataset file
print("Upload your first_principles_dataset.json file:")
uploaded = files.upload()

# Get the uploaded file name
dataset_file = list(uploaded.keys())[0]
print(f"Dataset uploaded: {dataset_file}")

# Verify dataset format
with open(dataset_file, 'r') as f:
    data = json.load(f)
    print(f"Dataset contains {len(data)} examples")
    print("Sample entry:", data[0])


In [None]:
# Option 2: Mount Google Drive (alternative to file upload)
# Uncomment the lines below if you prefer to use Google Drive

# from google.colab import drive
# drive.mount('/content/drive')
# dataset_file = '/content/drive/MyDrive/path/to/your/first_principles_dataset.json'


In [None]:
## 4. Model and Training Setup


In [None]:
from datasets import Dataset
from trl import SFTConfig, SFTTrainer
import torch
from transformers import (
    AutoTokenizer, 
    BitsAndBytesConfig, 
    Qwen2VLForConditionalGeneration,
    TrainingArguments
)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
import json


In [None]:
# Configuration
model_name = "Qwen/Qwen2-VL-2B-Instruct"
output_dir = "./qwen2-vl-sft-results"
hub_model_id = "your-username/Qwen2-VL-2B-Instruct-SFT"  # Change this to your desired model name

# Initialize WandB
wandb.init(
    project="qwen2-vl-sft-colab",
    config={
        "model": model_name,
        "dataset": dataset_file,
        "lora_r": 32,
        "batch_size": 1,
        "learning_rate": 2e-4,
        "platform": "Google Colab"
    }
)


In [None]:
# 4-bit quantization configuration for GPU memory efficiency
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
)

print("Quantization config created")


In [None]:
# Load model with quantization
print("Loading model...")
model = Qwen2VLForConditionalGeneration.from_pretrained(
    model_name,
    quantization_config=quantization_config,
    device_map="auto",
    torch_dtype=torch.bfloat16,
    trust_remote_code=True
)

print("Model loaded successfully!")
print(f"Model device: {next(model.parameters()).device}")


In [None]:
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(
    model_name,
    trust_remote_code=True,
)
tokenizer.pad_token = tokenizer.eos_token
print("Tokenizer loaded successfully!")


In [None]:
# Prepare model for k-bit training
model = prepare_model_for_kbit_training(model)
model.config.use_cache = False
print("Model prepared for k-bit training")


In [None]:
# LoRA configuration
peft_config = LoraConfig(
    r=32,
    lora_alpha=64,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

# Apply LoRA to model
model = get_peft_model(model, peft_config)
print("LoRA configuration applied")
print(f"Trainable parameters: {model.num_parameters(only_trainable=True):,}")
print(f"Total parameters: {model.num_parameters():,}")


In [None]:
## 5. Dataset Preparation


In [None]:
# Load and prepare dataset
with open(dataset_file, 'r') as f:
    dataset_json = json.load(f)

# Convert to HuggingFace Dataset
dataset = Dataset.from_list(dataset_json)
print(f"Dataset loaded with {len(dataset)} examples")
print("Sample entry:", dataset[0])


In [None]:
# Dataset formatting function
def format_dataset(sample):
    """Format the dataset for chat template"""
    return tokenizer.apply_chat_template(
        sample["messages"],
        tokenize=False,
        add_generation_prompt=False
    )

# Test the formatting function
sample_formatted = format_dataset(dataset[0])
print("Formatted sample (first 300 chars):")
print(sample_formatted[:300] + "...")


In [None]:
## 6. Training Configuration and Training


In [None]:
# Training arguments
training_args = SFTConfig(
    output_dir=output_dir,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,  # Effective batch size = 4
    gradient_checkpointing=True,
    learning_rate=2e-4,
    num_train_epochs=3,
    weight_decay=0.01,
    warmup_ratio=0.1,
    report_to="wandb",
    save_strategy="epoch",
    save_total_limit=2,
    logging_steps=10,
    save_only_model=True,
    log_on_each_node=True,
    push_to_hub=True,
    hub_model_id=hub_model_id,
    max_length=2048,
    packing=True,
    dataset_text_field="messages",
    bf16=torch.cuda.is_available(),  # Use bf16 if GPU supports it
    fp16=not torch.cuda.is_available() or not torch.cuda.is_bf16_supported(),
    optim="adamw_torch",
    dataloader_num_workers=2,
)

print("Training arguments configured")
print(f"Effective batch size: {training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps}")
print(f"Total training steps: {len(dataset) * training_args.num_train_epochs // (training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps)}")


In [None]:
# Create trainer
trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
    formatting_func=format_dataset,
    tokenizer=tokenizer,
)

print("Trainer created successfully!")
print(f"Number of training examples: {len(trainer.train_dataset)}")


In [None]:
# Start training
print("Starting training...")
print("This may take 1-3 hours depending on your dataset size and GPU.")

trainer.train()

print("Training completed!")


In [None]:
## 7. Save and Test the Model


In [None]:
# Save the final model
final_model_path = f"{output_dir}/final_model"
trainer.save_model(final_model_path)
tokenizer.save_pretrained(final_model_path)

print(f"Model saved to {final_model_path}")
print("Model files:")
!ls -la {final_model_path}


In [None]:
# Test the fine-tuned model
def test_model(prompt, max_length=200):
    messages = [
        {"role": "system", "content": "You are an expert educator who explains concepts from first principles like Richard Feynman. Start with fundamental truths, use simple analogies, and avoid jargon."},
        {"role": "user", "content": prompt}
    ]
    
    formatted_prompt = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )
    
    inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)
    
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_length,
            temperature=0.7,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id
        )
    
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    # Extract just the assistant's response
    assistant_response = response.split("<|im_start|>assistant\n")[-1]
    return assistant_response

# Test with a few prompts
test_prompts = [
    "Why does ice float on water?",
    "How do computers understand binary code?",
    "What is gravity from a physics perspective?"
]

print("Testing the fine-tuned model:\n")
for i, prompt in enumerate(test_prompts, 1):
    print(f"Test {i}: {prompt}")
    response = test_model(prompt)
    print(f"Response: {response}")
    print("-" * 80)


In [None]:
# Finish WandB run
wandb.finish()
print("Training complete! Check your WandB dashboard for training metrics.")


In [None]:
## 8. Download Your Model (Optional)

If you want to download the trained model to your local machine:


In [None]:
# Create a zip file of the trained model
import shutil

# Zip the final model
shutil.make_archive('qwen2_vl_finetuned_model', 'zip', final_model_path)

# Download the model
from google.colab import files
files.download('qwen2_vl_finetuned_model.zip')

print("Model downloaded! You can now use this model locally.")


In [None]:
## 🎉 Training Complete!

### What happened:
- ✅ Loaded Qwen2-VL-2B-Instruct model with 4-bit quantization
- ✅ Applied LoRA for efficient fine-tuning
- ✅ Trained on your first principles dataset
- ✅ Saved the model with adapters
- ✅ Tested the fine-tuned model

### Next steps:
1. **Test more extensively**: Try various prompts to evaluate performance
2. **Push to Hub**: Your model is automatically pushed to HuggingFace Hub
3. **Use the model**: Load it in your applications or continue training
4. **Iterate**: Adjust hyperparameters and retrain if needed

### Model usage:
```python
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel

# Load base model
base_model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2-VL-2B-Instruct")
# Load your fine-tuned adapters
model = PeftModel.from_pretrained(base_model, "your-username/Qwen2-VL-2B-Instruct-SFT")
tokenizer = AutoTokenizer.from_pretrained("your-username/Qwen2-VL-2B-Instruct-SFT")
```
