# üöÄ Mongolian GRPO Model Training on Google Colab

This notebook runs the complete GRPO training pipeline on Google Colab with GPU acceleration.

**Requirements:**
- Google Colab Pro (recommended for T4/V100 GPU)
- OpenAI API key for dataset generation
- Runtime: GPU (T4, V100, or A100)

**Expected Training Time:** 30-60 minutes

## üîß Setup Environment

In [None]:
# Check GPU availability
import torch
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"VRAM: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
else:
    print("‚ö†Ô∏è No GPU detected. Please enable GPU runtime:")
    print("Runtime ‚Üí Change runtime type ‚Üí Hardware accelerator ‚Üí GPU")

In [None]:
# Install training dependencies
!pip install -q torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
!pip install -q transformers>=4.35.0
!pip install -q accelerate>=0.24.0
!pip install -q peft>=0.6.0
!pip install -q trl>=0.7.0
!pip install -q datasets>=2.14.0
!pip install -q bitsandbytes>=0.41.0
!pip install -q wandb>=0.15.0
!pip install -q openai>=1.0.0
!pip install -q tqdm numpy scipy

print("‚úÖ Dependencies installed successfully!")

## üîß Setup Environment and Clone Repository

In [None]:
#@title üîß Setup Environment and Clone from GitHub
#@markdown This cell will:
#@markdown - Check GPU availability
#@markdown - Fix dependency conflicts (pyarrow)
#@markdown - Install all required dependencies
#@markdown - **Clone repository from GitHub** (not upload)
#@markdown - Verify the stable GRPO dataset exists

import os
import sys
import torch

# Check GPU
print("üîç Checking GPU availability...")
!nvidia-smi
print(f"\nCUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"VRAM: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
else:
    print("‚ö†Ô∏è No GPU detected. Please enable GPU runtime:")
    print("Runtime ‚Üí Change runtime type ‚Üí Hardware accelerator ‚Üí GPU")

# Fix pyarrow dependency conflict
# Note: cudf requires pyarrow<20.0, but we don't need cudf for GRPO training
# This fix prevents warnings and ensures compatibility
print("\nüîß Fixing dependency conflicts...")
print("   (cudf requires pyarrow<20.0, but this won't affect GRPO training)")
!pip install "pyarrow>=14.0.0,<20.0.0" --force-reinstall --no-deps 2>/dev/null || true
!pip install pyarrow --no-deps 2>/dev/null || true
print("‚úÖ Dependency conflicts resolved (safe to ignore any remaining warnings)")

# Install dependencies optimized for GRPO training
print("\nüì¶ Installing training dependencies...")
!pip install -q transformers>=4.35.0 trl>=0.7.0 accelerate>=0.24.0 peft>=0.6.0 datasets>=2.14.0 bitsandbytes>=0.41.0 --upgrade
!pip install -q wandb openai tqdm numpy scipy matplotlib
print("‚úÖ Dependencies installed")

# Clone repository from GitHub
print("\nüì• Cloning repository from GitHub...")
print("   Repository: https://github.com/Orshikhbayar/mongolian_history_etl.git")

repo_path = "/content/mongolian_history_etl"
repo_url = "https://github.com/Orshikhbayar/mongolian_history_etl.git"

if os.path.exists(repo_path):
    print("   ‚úÖ Repository already exists, pulling latest changes...")
    os.chdir(repo_path)
    !git pull
else:
    print("   üì• Cloning from GitHub...")
    !git clone https://github.com/Orshikhbayar/mongolian_history_etl.git
    os.chdir(repo_path)

# Add to Python path
if repo_path not in sys.path:
    sys.path.insert(0, repo_path)
    print(f"   ‚úÖ Added {repo_path} to Python path")

print(f"\n‚úÖ Working directory: {os.getcwd()}")

# Verify stable dataset exists
print("\nüìä Checking dataset...")
if os.path.exists('data/mgl_history_grpo_stable.jsonl'):
    !ls -lh data/mgl_history_grpo_stable.jsonl
    print("‚úÖ Stable GRPO dataset found!")
    
    # Show dataset stats
    with open('data/mgl_history_grpo_stable.jsonl', 'r', encoding='utf-8') as f:
        lines = f.readlines()
    print(f"üìà Dataset contains {len(lines)} training pairs")
elif os.path.exists('data/mgl_history_grpo.jsonl'):
    !ls -lh data/mgl_history_grpo.jsonl
    print("‚úÖ GRPO dataset found (using mgl_history_grpo.jsonl)")
    with open('data/mgl_history_grpo.jsonl', 'r', encoding='utf-8') as f:
        lines = f.readlines()
    print(f"üìà Dataset contains {len(lines)} training pairs")
else:
    print("‚ùå GRPO dataset not found. Available datasets:")
    !ls -la data/*.jsonl 2>/dev/null || echo "No .jsonl files found in data/"

print("\nüéØ Ready for GRPO training!")

## üìä Generate GRPO Dataset (if needed)

In [None]:
#@title üìä Generate Main GRPO Dataset
#@markdown Run this cell if the main GRPO dataset doesn't exist
openai_api_key_gen = "" #@param {type:"string"}
pairs_target = 100 #@param {type:"integer"}
generate_dataset = True #@param {type:"boolean"}

import os
from getpass import getpass

# Check if main dataset exists
main_dataset = 'data/mgl_history_grpo_stable.jsonl'
if os.path.exists(main_dataset):
    with open(main_dataset, 'r', encoding='utf-8') as f:
        lines = f.readlines()
    print(f"‚úÖ Main dataset already exists: {len(lines)} pairs")
    print("No need to generate - ready for training!")
else:
    print("‚ùå Main GRPO dataset not found")
    print("Available datasets:")
    !ls -la data/*grpo*.jsonl
    
    if generate_dataset:
        print("\nüîß Generating main GRPO dataset...")
        
        # Set API key
        if not openai_api_key_gen:
            print("üîë Enter your OpenAI API key:")
            openai_api_key_gen = getpass("OpenAI API Key: ")
        
        os.environ['OPENAI_API_KEY'] = openai_api_key_gen
        
        # Generate dataset
        print(f"üöÄ Generating {pairs_target} GRPO pairs...")
        !python scripts/build_grpo_dataset_stable.py --pairs-target {pairs_target} --output {main_dataset}
        
        # Verify generation
        if os.path.exists(main_dataset):
            with open(main_dataset, 'r', encoding='utf-8') as f:
                lines = f.readlines()
            print(f"‚úÖ Dataset generated successfully: {len(lines)} pairs")
        else:
            print("‚ùå Dataset generation failed")
    else:
        print("\n‚ö†Ô∏è Dataset generation skipped")
        print("You can use existing test datasets or enable generation above")

## üöÄ Direct GRPO Training from Repository

In [None]:
#@title üöÄ Run GRPO Training on Stable Dataset
#@markdown Configure training parameters:
openai_api_key = "" #@param {type:"string"}
use_wandb = False #@param {type:"boolean"}
wandb_api_key = "" #@param {type:"string"}
batch_size = 2 #@param {type:"integer"}
learning_rate = 5e-6 #@param {type:"number"}
epochs = 2 #@param {type:"integer"}
max_length = 512 #@param {type:"integer"}
lora_r = 16 #@param {type:"integer"}
lora_alpha = 32 #@param {type:"integer"}

import os
from getpass import getpass

# Set API keys
if not openai_api_key:
    print("üîë Enter your OpenAI API key:")
    openai_api_key = getpass("OpenAI API Key: ")

os.environ['OPENAI_API_KEY'] = openai_api_key

if use_wandb and wandb_api_key:
    os.environ['WANDB_API_KEY'] = wandb_api_key
    !wandb login

# Verify we're in the right directory and have the dataset
if not os.path.exists('data/mgl_history_grpo_stable.jsonl'):
    print("‚ùå Stable dataset not found! Please run the setup cell first.")
    exit()

# Build training command
training_cmd = f"""
python scripts/train_grpo_model.py \
  --base mistralai/Mistral-7B-Instruct-v0.2 \
  --dataset data/mgl_history_grpo_stable.jsonl \
  --output models/mgl_history_grpo_adapter \
  --batch-size {batch_size} \
  --learning-rate {learning_rate} \
  --epochs {epochs} \
  --max-length {max_length} \
  --lora-r {lora_r} \
  --lora-alpha {lora_alpha}
"""

if use_wandb and wandb_api_key:
    training_cmd += " --use-wandb"

print("üî• Starting GRPO training with stable dataset...")
print(f"üìä Dataset: data/mgl_history_grpo_stable.jsonl")
print(f"ü§ñ Model: mistralai/Mistral-7B-Instruct-v0.2")
print(f"üíæ Output: models/mgl_history_grpo_adapter")
print(f"‚öôÔ∏è Batch size: {batch_size}, LR: {learning_rate}, Epochs: {epochs}")
print("\n" + "="*60)

# Execute training
!{training_cmd}

print("\nüéâ Training completed!")

## üìà Training Results and Model Testing

In [None]:
#@title üìà View Results and Test Model
#@markdown This cell will:
#@markdown - Display training statistics
#@markdown - Load the trained model
#@markdown - Test with Mongolian historical questions
#@markdown - Create downloadable model archive

import json
import os
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
import zipfile
from datetime import datetime
from google.colab import files

# Display training statistics
print("üìä TRAINING RESULTS")
print("=" * 50)

stats_file = 'models/mgl_history_grpo_adapter/training_logs/training_stats.json'
if os.path.exists(stats_file):
    with open(stats_file, 'r') as f:
        stats = json.load(f)
    
    print(f"‚úÖ Base model: {stats.get('base_model', 'mistralai/Mistral-7B-Instruct-v0.2')}")
    print(f"üìä Dataset: data/mgl_history_grpo_stable.jsonl")
    print(f"üî¢ Training samples: {stats.get('train_samples', 'N/A')}")
    print(f"üî¢ Test samples: {stats.get('test_samples', 'N/A')}")
    print(f"üîÑ Epochs completed: {stats.get('epochs', 'N/A')}")
    print(f"üìâ Final loss: {stats.get('final_loss', 'N/A'):.4f}")
    print(f"üéØ Validation accuracy: {stats.get('validation_accuracy', 'N/A'):.1%}")
    print(f"üèÜ Mean reward: {stats.get('mean_reward', 'N/A'):.3f}")
    print(f"‚è±Ô∏è Training time: {stats.get('training_time', 'N/A'):.1f}s")
    print(f"üíæ Model size: {stats.get('model_size_mb', 'N/A'):.1f}MB")
else:
    print("‚ö†Ô∏è Training statistics not found.")
    print("Available files:")
    !find models/ -name "*.json" -type f

# Test the trained model
print("\nüß™ TESTING TRAINED MODEL")
print("=" * 50)

if os.path.exists('models/mgl_history_grpo_adapter'):
    try:
        print("üì• Loading trained model...")
        
        # Load base model and tokenizer
        base_model_name = "mistralai/Mistral-7B-Instruct-v0.2"
        tokenizer = AutoTokenizer.from_pretrained(base_model_name)
        base_model = AutoModelForCausalLM.from_pretrained(
            base_model_name,
            torch_dtype=torch.float16,
            device_map="auto",
            load_in_8bit=True  # Use 8-bit to save memory
        )
        
        # Load LoRA adapter
        model = PeftModel.from_pretrained(base_model, 'models/mgl_history_grpo_adapter')
        
        if tokenizer.pad_token is None:
            tokenizer.pad_token = tokenizer.eos_token
        
        print("‚úÖ Model loaded successfully!")
        
        # Test with Mongolian historical questions
        test_questions = [
            "–ß–∏–Ω–≥–∏—Å —Ö–∞–∞–Ω—ã —Ç—É—Ö–∞–π —Ç–æ–≤—á —è—Ä–∏–Ω–∞ —É—É?",
            "1921 –æ–Ω—ã —Ö—É–≤—å—Å–≥–∞–ª—ã–Ω “Ø—Ä –¥“Ø–Ω —é—É –±–∞–π—Å–∞–Ω –±—ç?",
            "–ú–æ–Ω–≥–æ–ª—ã–Ω –∞—Ä–¥—á–∏–ª—Å–∞–Ω —Ö—É–≤—å—Å–≥–∞–ª —Ö—ç–∑—ç—ç –±–æ–ª—Å–æ–Ω –±—ç?",
            "–ë–æ–≥–¥ —Ö–∞–∞–Ω—ã “Ø–µ–∏–π–Ω –æ–Ω—Ü–ª–æ–≥ —Ç—É—Ö–∞–π —Ö—ç–ª–Ω—ç “Ø“Ø?"
        ]
        
        for i, question in enumerate(test_questions, 1):
            print(f"\n{i}. ‚ùì {question}")
            
            # Format prompt
            formatted_prompt = f"<|user|>\n{question}\n<|assistant|>\n"
            
            # Tokenize
            inputs = tokenizer(
                formatted_prompt,
                return_tensors="pt",
                truncation=True,
                max_length=512
            )
            
            if torch.cuda.is_available():
                inputs = {k: v.cuda() for k, v in inputs.items()}
            
            # Generate response
            with torch.no_grad():
                outputs = model.generate(
                    **inputs,
                    max_new_tokens=200,
                    do_sample=True,
                    temperature=0.7,
                    top_p=0.9,
                    pad_token_id=tokenizer.eos_token_id
                )
            
            # Decode response
            response = tokenizer.decode(
                outputs[0][inputs['input_ids'].shape[1]:],
                skip_special_tokens=True
            ).strip()
            
            print(f"   ü§ñ {response}")
        
        # Clean up memory
        del model, base_model, tokenizer
        torch.cuda.empty_cache()
        
    except Exception as e:
        print(f"‚ùå Error testing model: {e}")
        print("This might be due to memory constraints.")

# Create downloadable archive
print("\nüì¶ CREATING DOWNLOAD ARCHIVE")
print("=" * 50)

if os.path.exists('models/mgl_history_grpo_adapter'):
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    archive_name = f"mongolian_grpo_model_{timestamp}.zip"
    
    with zipfile.ZipFile(archive_name, 'w', zipfile.ZIP_DEFLATED) as zipf:
        # Add model files
        for root, dirs, files in os.walk('models/mgl_history_grpo_adapter'):
            for file in files:
                file_path = os.path.join(root, file)
                arcname = os.path.relpath(file_path, '.')
                zipf.write(file_path, arcname)
        
        # Add stable dataset
        if os.path.exists('data/mgl_history_grpo_stable.jsonl'):
            zipf.write('data/mgl_history_grpo_stable.jsonl', 'data/mgl_history_grpo_stable.jsonl')
        
        # Add stats if available
        if os.path.exists('data/mgl_history_grpo_stats_stable.json'):
            zipf.write('data/mgl_history_grpo_stats_stable.json', 'data/mgl_history_grpo_stats_stable.json')
    
    print(f"‚úÖ Archive created: {archive_name}")
    print(f"üìÅ Size: {os.path.getsize(archive_name) / 1024 / 1024:.1f} MB")
    
    # Download
    print("‚¨áÔ∏è Starting download...")
    files.download(archive_name)
    
    print("\nüéâ SUCCESS! Your trained Mongolian GRPO model is ready!")
    print("\nüìã Archive contains:")
    print("   ‚úÖ Trained LoRA adapter")
    print("   ‚úÖ Training logs and statistics")
    print("   ‚úÖ Stable GRPO dataset")
    print("   ‚úÖ Dataset statistics")
    
    print("\nüöÄ Integration example:")
    print("```python")
    print("from peft import PeftModel")
    print("from transformers import AutoModelForCausalLM")
    print("")
    print("base_model = AutoModelForCausalLM.from_pretrained(")
    print("    'mistralai/Mistral-7B-Instruct-v0.2'")
    print("model = PeftModel.from_pretrained(")
    print("    base_model, 'models/mgl_history_grpo_adapter'")
    print(")")
    print("```")
    
else:
    print("‚ùå No trained model found to archive.")

print("\nüåü Training complete! Your Mongolian historical AI is ready to use.")

## üîÑ Push Results Back to GitHub (Optional)

In [None]:
#@title üîÑ Push Trained Model to GitHub
#@markdown This will commit and push your trained model back to your repository
push_to_github = False #@param {type:"boolean"}
github_token = "" #@param {type:"string"}
commit_message = "Add trained GRPO model from Colab" #@param {type:"string"}

import os
from getpass import getpass

if push_to_github:
    print("üîß Setting up Git credentials...")
    
    # Get GitHub token if not provided
    if not github_token:
        print("üîë You need a GitHub Personal Access Token")
        print("Create one at: https://github.com/settings/tokens")
        print("Required permissions: repo (full control)")
        github_token = getpass("GitHub Token: ")
    
    # Configure Git
    !git config --global user.name "Colab Training Bot"
    !git config --global user.email "colab@training.bot"
    
    # Set remote URL with token
    !git remote set-url origin https://{github_token}@github.com/Orshikhbayar/mongolian_history_etl.git
    
    # Check what files to add
    print("\nüìÅ Files to commit:")
    !git status --porcelain
    
    # Add trained model files
    if os.path.exists('models/mgl_history_grpo_adapter'):
        !git add models/mgl_history_grpo_adapter/
        print("‚úÖ Added trained model")
    
    # Add any generated datasets
    if os.path.exists('data/mgl_history_grpo_stable.jsonl'):
        !git add data/mgl_history_grpo_stable.jsonl
        print("‚úÖ Added generated dataset")
    
    # Add training logs
    !git add -A training_logs/ 2>/dev/null || echo "No training logs to add"
    
    # Commit changes
    try:
        !git commit -m "{commit_message}"
        print("‚úÖ Changes committed")
        
        # Push to GitHub
        !git push origin main
        print("üöÄ Successfully pushed to GitHub!")
        print("\nüéØ To sync in Kiro, run: git pull origin main")
        
    except Exception as e:
        print(f"‚ùå Error during commit/push: {e}")
        print("You can still download the model archive manually")
        
else:
    print("‚è≠Ô∏è Skipping GitHub push")
    print("\nüí° To push later:")
    print("1. Enable 'push_to_github' above")
    print("2. Get GitHub token from: https://github.com/settings/tokens")
    print("3. Re-run this cell")
    print("\nüì• Or download the model archive and sync manually")

## üìÅ Upload Project Files

In [None]:
# Option 1: Upload from local files
from google.colab import files
import zipfile
import os

print("üì§ Upload your project as a ZIP file:")
print("1. Zip your entire project folder")
print("2. Upload it using the file browser below")
print("3. The notebook will extract it automatically")

# Upload ZIP file
uploaded = files.upload()

# Extract the first ZIP file found
for filename in uploaded.keys():
    if filename.endswith('.zip'):
        print(f"üì¶ Extracting {filename}...")
        with zipfile.ZipFile(filename, 'r') as zip_ref:
            zip_ref.extractall('.')
        print("‚úÖ Project extracted successfully!")
        break

# List project structure
!ls -la

In [None]:
# Option 2: Clone from GitHub (if you've pushed your project)
# Uncomment and modify the following lines:

# !git clone https://github.com/yourusername/mongolian-history-project.git
# %cd mongolian-history-project
# !ls -la

## üîë Set API Keys

In [None]:
import os
from getpass import getpass

# Set OpenAI API key
print("üîë Enter your OpenAI API key:")
openai_key = getpass("OpenAI API Key: ")
os.environ['OPENAI_API_KEY'] = openai_key

# Optional: Set Weights & Biases key for experiment tracking
print("\nüîë Enter your W&B API key (optional, press Enter to skip):")
wandb_key = getpass("W&B API Key (optional): ")
if wandb_key:
    os.environ['WANDB_API_KEY'] = wandb_key
    !wandb login

print("‚úÖ API keys configured!")

## üìä Generate GRPO Dataset

In [None]:
# Check if we have existing datasets
!ls -la data/

# Generate GRPO dataset if not exists
import os
if not os.path.exists('data/mgl_history_grpo.jsonl'):
    print("üìä Generating GRPO dataset...")
    !python scripts/build_grpo_dataset.py --pairs-per-topic 20 --output data/mgl_history_grpo.jsonl
else:
    print("‚úÖ GRPO dataset already exists")

# Validate the dataset
!python scripts/validate_mgl_dataset.py --files data/mgl_history_grpo.jsonl

## üöÄ Run GRPO Training

In [None]:
# Start GRPO training with optimized settings for Colab
training_command = """
python scripts/train_grpo_model.py \
  --base mistralai/Mistral-7B-Instruct-v0.2 \
  --dataset data/mgl_history_grpo.jsonl \
  --output models/mgl_history_grpo_adapter \
  --batch-size 2 \
  --learning-rate 5e-6 \
  --epochs 2 \
  --max-length 512 \
  --lora-r 16 \
  --lora-alpha 32
"""

# Add W&B logging if key is set
if 'WANDB_API_KEY' in os.environ:
    training_command += " --use-wandb"

print("üî• Starting GRPO training...")
print(f"Command: {training_command.strip()}")
print("\n" + "="*50)

# Execute training
!{training_command}

## üìà Monitor Training Progress

In [None]:
# Check training logs
import json
import matplotlib.pyplot as plt

# Display training statistics
if os.path.exists('training_logs/training_stats.json'):
    with open('training_logs/training_stats.json', 'r') as f:
        stats = json.load(f)
    
    print("üìä TRAINING RESULTS")
    print("=" * 30)
    print(f"Base model: {stats.get('base_model', 'N/A')}")
    print(f"Dataset samples: {stats.get('total_samples', 'N/A')}")
    print(f"Training samples: {stats.get('train_samples', 'N/A')}")
    print(f"Test samples: {stats.get('test_samples', 'N/A')}")
    print(f"Epochs: {stats.get('epochs', 'N/A')}")
    print(f"Total steps: {stats.get('total_steps', 'N/A')}")
    print(f"Average loss: {stats.get('avg_loss', 'N/A'):.4f}")
    print(f"Final loss: {stats.get('final_loss', 'N/A'):.4f}")
    print(f"Mean reward: {stats.get('mean_reward', 'N/A'):.3f}")
    print(f"Validation accuracy: {stats.get('validation_accuracy', 'N/A'):.1%}")
    print(f"Training time: {stats.get('training_time', 'N/A'):.1f}s")
    print(f"Model size: {stats.get('model_size_mb', 'N/A'):.1f}MB")
else:
    print("‚ö†Ô∏è Training statistics not found. Check if training completed successfully.")

# Check model files
print("\nüìÅ MODEL FILES:")
!ls -la models/mgl_history_grpo_adapter/

## üß™ Test Trained Model

In [None]:
# Load and test the trained model
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
import torch

def load_trained_model():
    """Load the trained GRPO model."""
    base_model_name = "mistralai/Mistral-7B-Instruct-v0.2"
    adapter_path = "models/mgl_history_grpo_adapter"
    
    print("üì• Loading base model...")
    tokenizer = AutoTokenizer.from_pretrained(base_model_name)
    base_model = AutoModelForCausalLM.from_pretrained(
        base_model_name,
        torch_dtype=torch.float16,
        device_map="auto"
    )
    
    print("üîß Loading LoRA adapter...")
    model = PeftModel.from_pretrained(base_model, adapter_path)
    
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
    
    return model, tokenizer

def generate_response(model, tokenizer, prompt, max_length=256):
    """Generate response using the trained model."""
    formatted_prompt = f"<|user|>\n{prompt}\n<|assistant|>\n"
    
    inputs = tokenizer(
        formatted_prompt,
        return_tensors="pt",
        truncation=True,
        max_length=512
    )
    
    if torch.cuda.is_available():
        inputs = {k: v.cuda() for k, v in inputs.items()}
    
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_length,
            do_sample=True,
            temperature=0.7,
            top_p=0.9,
            pad_token_id=tokenizer.eos_token_id
        )
    
    response = tokenizer.decode(
        outputs[0][inputs['input_ids'].shape[1]:],
        skip_special_tokens=True
    ).strip()
    
    return response

# Load the trained model
if os.path.exists('models/mgl_history_grpo_adapter'):
    try:
        model, tokenizer = load_trained_model()
        print("‚úÖ Model loaded successfully!")
        
        # Test with sample prompts
        test_prompts = [
            "–ß–∏–Ω–≥–∏—Å —Ö–∞–∞–Ω—ã —Ç—É—Ö–∞–π —è—Ä–∏–Ω–∞ —É—É?",
            "1921 –æ–Ω—ã —Ö—É–≤—å—Å–≥–∞–ª—ã–Ω “Ø—Ä –¥“Ø–Ω —é—É –±–∞–π—Å–∞–Ω –±—ç?",
            "–ú–æ–Ω–≥–æ–ª—ã–Ω –∞—Ä–¥—á–∏–ª—Å–∞–Ω —Ö—É–≤—å—Å–≥–∞–ª —Ö—ç—Ä—Ö—ç–Ω ”©—Ä–Ω”©—Å”©–Ω –±—ç?",
            "–ë–æ–≥–¥ —Ö–∞–∞–Ω—ã “Ø–µ–∏–π–Ω –æ–Ω—Ü–ª–æ–≥ –Ω—å —é—É –≤—ç?"
        ]
        
        print("\nüß™ TESTING TRAINED MODEL")
        print("=" * 40)
        
        for i, prompt in enumerate(test_prompts, 1):
            print(f"\n{i}. ‚ùì {prompt}")
            response = generate_response(model, tokenizer, prompt)
            print(f"   ü§ñ {response}")
            
    except Exception as e:
        print(f"‚ùå Error loading model: {e}")
        print("This might be due to memory constraints or incomplete training.")
else:
    print("‚ö†Ô∏è Trained model not found. Make sure training completed successfully.")

## üíæ Download Trained Model

In [None]:
# Create a ZIP file with the trained model and logs
import zipfile
import os
from datetime import datetime

def create_model_archive():
    """Create a ZIP archive with trained model and logs."""
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    archive_name = f"mongolian_grpo_model_{timestamp}.zip"
    
    with zipfile.ZipFile(archive_name, 'w', zipfile.ZIP_DEFLATED) as zipf:
        # Add model files
        if os.path.exists('models/mgl_history_grpo_adapter'):
            for root, dirs, files in os.walk('models/mgl_history_grpo_adapter'):
                for file in files:
                    file_path = os.path.join(root, file)
                    arcname = os.path.relpath(file_path, '.')
                    zipf.write(file_path, arcname)
        
        # Add training logs
        if os.path.exists('training_logs'):
            for root, dirs, files in os.walk('training_logs'):
                for file in files:
                    file_path = os.path.join(root, file)
                    arcname = os.path.relpath(file_path, '.')
                    zipf.write(file_path, arcname)
        
        # Add dataset if exists
        if os.path.exists('data/mgl_history_grpo.jsonl'):
            zipf.write('data/mgl_history_grpo.jsonl', 'data/mgl_history_grpo.jsonl')
    
    return archive_name

# Create and download the archive
if os.path.exists('models/mgl_history_grpo_adapter'):
    print("üì¶ Creating model archive...")
    archive_name = create_model_archive()
    
    print(f"‚úÖ Archive created: {archive_name}")
    print(f"üìÅ Archive size: {os.path.getsize(archive_name) / 1024 / 1024:.1f} MB")
    
    # Download the archive
    print("‚¨áÔ∏è Downloading archive...")
    files.download(archive_name)
    
    print("\nüéâ SUCCESS! Your trained model is ready!")
    print("\nüìã What you got:")
    print("‚úÖ Trained LoRA adapter (models/mgl_history_grpo_adapter/)")
    print("‚úÖ Training logs and statistics (training_logs/)")
    print("‚úÖ GRPO dataset (data/mgl_history_grpo.jsonl)")
    
    print("\nüöÄ Next steps:")
    print("1. Extract the ZIP file in your local project")
    print("2. Load the model with: PeftModel.from_pretrained(base_model, 'models/mgl_history_grpo_adapter')")
    print("3. Integrate with your RAG system for improved Mongolian responses")
    
else:
    print("‚ö†Ô∏è No trained model found to download.")
    print("Make sure the training completed successfully.")

## üìä Training Summary

In [None]:
# Display final training summary
print("üéâ MONGOLIAN GRPO TRAINING COMPLETE!")
print("=" * 50)

# Check GPU usage
if torch.cuda.is_available():
    print(f"üî• GPU Used: {torch.cuda.get_device_name(0)}")
    print(f"üíæ VRAM Used: {torch.cuda.memory_allocated(0) / 1024**3:.1f} GB")

# Check training results
if os.path.exists('training_logs/training_stats.json'):
    with open('training_logs/training_stats.json', 'r') as f:
        stats = json.load(f)
    
    print(f"\nüìà Training Results:")
    print(f"   ‚Ä¢ Final Loss: {stats.get('final_loss', 'N/A'):.4f}")
    print(f"   ‚Ä¢ Validation Accuracy: {stats.get('validation_accuracy', 'N/A'):.1%}")
    print(f"   ‚Ä¢ Mean Reward: {stats.get('mean_reward', 'N/A'):.3f}")
    print(f"   ‚Ä¢ Training Time: {stats.get('training_time', 'N/A'):.1f}s")

print("\n‚úÖ Your Mongolian historical AI model is now trained and ready!")
print("\nüîÆ The model can now provide more accurate, culturally appropriate,")
print("   and historically informed responses in Mongolian language.")

print("\nüéØ Integration Example:")
print("```python")
print("from peft import PeftModel")
print("from transformers import AutoModelForCausalLM")
print("")
print("base_model = AutoModelForCausalLM.from_pretrained('mistralai/Mistral-7B-Instruct-v0.2')")
print("model = PeftModel.from_pretrained(base_model, 'models/mgl_history_grpo_adapter')")
print("```")

print("\nüåü Congratulations on successfully training your Mongolian AI model!")