# üöÄ Mongolian GRPO Model Training on Google Colab

This notebook runs the complete GRPO training pipeline on Google Colab with GPU acceleration.

**Requirements:**
- Google Colab Pro (recommended for T4/V100 GPU)
- OpenAI API key for dataset generation
- Runtime: GPU (T4, V100, or A100)

**Expected Training Time:** 30-60 minutes

## üîß Setup Environment

In [None]:
# Check GPU availability
import torch
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"VRAM: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
else:
    print("‚ö†Ô∏è No GPU detected. Please enable GPU runtime:")
    print("Runtime ‚Üí Change runtime type ‚Üí Hardware accelerator ‚Üí GPU")

In [None]:
# Install training dependencies
!pip install -q torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
!pip install -q transformers>=4.35.0
!pip install -q accelerate>=0.24.0
!pip install -q peft>=0.6.0
!pip install -q trl>=0.7.0
!pip install -q datasets>=2.14.0
!pip install -q bitsandbytes>=0.41.0
!pip install -q wandb>=0.15.0
!pip install -q openai>=1.0.0
!pip install -q tqdm numpy scipy

print("‚úÖ Dependencies installed successfully!")

## üìÅ Upload Project Files

In [None]:
# Option 1: Upload from local files
from google.colab import files
import zipfile
import os

print("üì§ Upload your project as a ZIP file:")
print("1. Zip your entire project folder")
print("2. Upload it using the file browser below")
print("3. The notebook will extract it automatically")

# Upload ZIP file
uploaded = files.upload()

# Extract the first ZIP file found
for filename in uploaded.keys():
    if filename.endswith('.zip'):
        print(f"üì¶ Extracting {filename}...")
        with zipfile.ZipFile(filename, 'r') as zip_ref:
            zip_ref.extractall('.')
        print("‚úÖ Project extracted successfully!")
        break

# List project structure
!ls -la

In [None]:
# Option 2: Clone from GitHub (if you've pushed your project)
# Uncomment and modify the following lines:

# !git clone https://github.com/yourusername/mongolian-history-project.git
# %cd mongolian-history-project
# !ls -la

## üîë Set API Keys

In [None]:
import os
from getpass import getpass

# Set OpenAI API key
print("üîë Enter your OpenAI API key:")
openai_key = getpass("OpenAI API Key: ")
os.environ['OPENAI_API_KEY'] = openai_key

# Optional: Set Weights & Biases key for experiment tracking
print("\nüîë Enter your W&B API key (optional, press Enter to skip):")
wandb_key = getpass("W&B API Key (optional): ")
if wandb_key:
    os.environ['WANDB_API_KEY'] = wandb_key
    !wandb login

print("‚úÖ API keys configured!")

## üìä Generate GRPO Dataset

In [None]:
# Check if we have existing datasets
!ls -la data/

# Generate GRPO dataset if not exists
import os
if not os.path.exists('data/mgl_history_grpo.jsonl'):
    print("üìä Generating GRPO dataset...")
    !python scripts/build_grpo_dataset.py --pairs-per-topic 20 --output data/mgl_history_grpo.jsonl
else:
    print("‚úÖ GRPO dataset already exists")

# Validate the dataset
!python scripts/validate_mgl_dataset.py --files data/mgl_history_grpo.jsonl

## üöÄ Run GRPO Training

In [None]:
# Start GRPO training with optimized settings for Colab
training_command = """
python scripts/train_grpo_model.py \
  --base mistralai/Mistral-7B-Instruct-v0.2 \
  --dataset data/mgl_history_grpo.jsonl \
  --output models/mgl_history_grpo_adapter \
  --batch-size 2 \
  --learning-rate 5e-6 \
  --epochs 2 \
  --max-length 512 \
  --lora-r 16 \
  --lora-alpha 32
"""

# Add W&B logging if key is set
if 'WANDB_API_KEY' in os.environ:
    training_command += " --use-wandb"

print("üî• Starting GRPO training...")
print(f"Command: {training_command.strip()}")
print("\n" + "="*50)

# Execute training
!{training_command}

## üìà Monitor Training Progress

In [None]:
# Check training logs
import json
import matplotlib.pyplot as plt

# Display training statistics
if os.path.exists('training_logs/training_stats.json'):
    with open('training_logs/training_stats.json', 'r') as f:
        stats = json.load(f)
    
    print("üìä TRAINING RESULTS")
    print("=" * 30)
    print(f"Base model: {stats.get('base_model', 'N/A')}")
    print(f"Dataset samples: {stats.get('total_samples', 'N/A')}")
    print(f"Training samples: {stats.get('train_samples', 'N/A')}")
    print(f"Test samples: {stats.get('test_samples', 'N/A')}")
    print(f"Epochs: {stats.get('epochs', 'N/A')}")
    print(f"Total steps: {stats.get('total_steps', 'N/A')}")
    print(f"Average loss: {stats.get('avg_loss', 'N/A'):.4f}")
    print(f"Final loss: {stats.get('final_loss', 'N/A'):.4f}")
    print(f"Mean reward: {stats.get('mean_reward', 'N/A'):.3f}")
    print(f"Validation accuracy: {stats.get('validation_accuracy', 'N/A'):.1%}")
    print(f"Training time: {stats.get('training_time', 'N/A'):.1f}s")
    print(f"Model size: {stats.get('model_size_mb', 'N/A'):.1f}MB")
else:
    print("‚ö†Ô∏è Training statistics not found. Check if training completed successfully.")

# Check model files
print("\nüìÅ MODEL FILES:")
!ls -la models/mgl_history_grpo_adapter/

## üß™ Test Trained Model

In [None]:
# Load and test the trained model
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
import torch

def load_trained_model():
    """Load the trained GRPO model."""
    base_model_name = "mistralai/Mistral-7B-Instruct-v0.2"
    adapter_path = "models/mgl_history_grpo_adapter"
    
    print("üì• Loading base model...")
    tokenizer = AutoTokenizer.from_pretrained(base_model_name)
    base_model = AutoModelForCausalLM.from_pretrained(
        base_model_name,
        torch_dtype=torch.float16,
        device_map="auto"
    )
    
    print("üîß Loading LoRA adapter...")
    model = PeftModel.from_pretrained(base_model, adapter_path)
    
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
    
    return model, tokenizer

def generate_response(model, tokenizer, prompt, max_length=256):
    """Generate response using the trained model."""
    formatted_prompt = f"<|user|>\n{prompt}\n<|assistant|>\n"
    
    inputs = tokenizer(
        formatted_prompt,
        return_tensors="pt",
        truncation=True,
        max_length=512
    )
    
    if torch.cuda.is_available():
        inputs = {k: v.cuda() for k, v in inputs.items()}
    
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_length,
            do_sample=True,
            temperature=0.7,
            top_p=0.9,
            pad_token_id=tokenizer.eos_token_id
        )
    
    response = tokenizer.decode(
        outputs[0][inputs['input_ids'].shape[1]:],
        skip_special_tokens=True
    ).strip()
    
    return response

# Load the trained model
if os.path.exists('models/mgl_history_grpo_adapter'):
    try:
        model, tokenizer = load_trained_model()
        print("‚úÖ Model loaded successfully!")
        
        # Test with sample prompts
        test_prompts = [
            "–ß–∏–Ω–≥–∏—Å —Ö–∞–∞–Ω—ã —Ç—É—Ö–∞–π —è—Ä–∏–Ω–∞ —É—É?",
            "1921 –æ–Ω—ã —Ö—É–≤—å—Å–≥–∞–ª—ã–Ω “Ø—Ä –¥“Ø–Ω —é—É –±–∞–π—Å–∞–Ω –±—ç?",
            "–ú–æ–Ω–≥–æ–ª—ã–Ω –∞—Ä–¥—á–∏–ª—Å–∞–Ω —Ö—É–≤—å—Å–≥–∞–ª —Ö—ç—Ä—Ö—ç–Ω ”©—Ä–Ω”©—Å”©–Ω –±—ç?",
            "–ë–æ–≥–¥ —Ö–∞–∞–Ω—ã “Ø–µ–∏–π–Ω –æ–Ω—Ü–ª–æ–≥ –Ω—å —é—É –≤—ç?"
        ]
        
        print("\nüß™ TESTING TRAINED MODEL")
        print("=" * 40)
        
        for i, prompt in enumerate(test_prompts, 1):
            print(f"\n{i}. ‚ùì {prompt}")
            response = generate_response(model, tokenizer, prompt)
            print(f"   ü§ñ {response}")
            
    except Exception as e:
        print(f"‚ùå Error loading model: {e}")
        print("This might be due to memory constraints or incomplete training.")
else:
    print("‚ö†Ô∏è Trained model not found. Make sure training completed successfully.")

## üíæ Download Trained Model

In [None]:
# Create a ZIP file with the trained model and logs
import zipfile
import os
from datetime import datetime

def create_model_archive():
    """Create a ZIP archive with trained model and logs."""
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    archive_name = f"mongolian_grpo_model_{timestamp}.zip"
    
    with zipfile.ZipFile(archive_name, 'w', zipfile.ZIP_DEFLATED) as zipf:
        # Add model files
        if os.path.exists('models/mgl_history_grpo_adapter'):
            for root, dirs, files in os.walk('models/mgl_history_grpo_adapter'):
                for file in files:
                    file_path = os.path.join(root, file)
                    arcname = os.path.relpath(file_path, '.')
                    zipf.write(file_path, arcname)
        
        # Add training logs
        if os.path.exists('training_logs'):
            for root, dirs, files in os.walk('training_logs'):
                for file in files:
                    file_path = os.path.join(root, file)
                    arcname = os.path.relpath(file_path, '.')
                    zipf.write(file_path, arcname)
        
        # Add dataset if exists
        if os.path.exists('data/mgl_history_grpo.jsonl'):
            zipf.write('data/mgl_history_grpo.jsonl', 'data/mgl_history_grpo.jsonl')
    
    return archive_name

# Create and download the archive
if os.path.exists('models/mgl_history_grpo_adapter'):
    print("üì¶ Creating model archive...")
    archive_name = create_model_archive()
    
    print(f"‚úÖ Archive created: {archive_name}")
    print(f"üìÅ Archive size: {os.path.getsize(archive_name) / 1024 / 1024:.1f} MB")
    
    # Download the archive
    print("‚¨áÔ∏è Downloading archive...")
    files.download(archive_name)
    
    print("\nüéâ SUCCESS! Your trained model is ready!")
    print("\nüìã What you got:")
    print("‚úÖ Trained LoRA adapter (models/mgl_history_grpo_adapter/)")
    print("‚úÖ Training logs and statistics (training_logs/)")
    print("‚úÖ GRPO dataset (data/mgl_history_grpo.jsonl)")
    
    print("\nüöÄ Next steps:")
    print("1. Extract the ZIP file in your local project")
    print("2. Load the model with: PeftModel.from_pretrained(base_model, 'models/mgl_history_grpo_adapter')")
    print("3. Integrate with your RAG system for improved Mongolian responses")
    
else:
    print("‚ö†Ô∏è No trained model found to download.")
    print("Make sure the training completed successfully.")

## üìä Training Summary

In [None]:
# Display final training summary
print("üéâ MONGOLIAN GRPO TRAINING COMPLETE!")
print("=" * 50)

# Check GPU usage
if torch.cuda.is_available():
    print(f"üî• GPU Used: {torch.cuda.get_device_name(0)}")
    print(f"üíæ VRAM Used: {torch.cuda.memory_allocated(0) / 1024**3:.1f} GB")

# Check training results
if os.path.exists('training_logs/training_stats.json'):
    with open('training_logs/training_stats.json', 'r') as f:
        stats = json.load(f)
    
    print(f"\nüìà Training Results:")
    print(f"   ‚Ä¢ Final Loss: {stats.get('final_loss', 'N/A'):.4f}")
    print(f"   ‚Ä¢ Validation Accuracy: {stats.get('validation_accuracy', 'N/A'):.1%}")
    print(f"   ‚Ä¢ Mean Reward: {stats.get('mean_reward', 'N/A'):.3f}")
    print(f"   ‚Ä¢ Training Time: {stats.get('training_time', 'N/A'):.1f}s")

print("\n‚úÖ Your Mongolian historical AI model is now trained and ready!")
print("\nüîÆ The model can now provide more accurate, culturally appropriate,")
print("   and historically informed responses in Mongolian language.")

print("\nüéØ Integration Example:")
print("```python")
print("from peft import PeftModel")
print("from transformers import AutoModelForCausalLM")
print("")
print("base_model = AutoModelForCausalLM.from_pretrained('mistralai/Mistral-7B-Instruct-v0.2')")
print("model = PeftModel.from_pretrained(base_model, 'models/mgl_history_grpo_adapter')")
print("```")

print("\nüåü Congratulations on successfully training your Mongolian AI model!")