# 🚀 QWEN3-8B Turkish Training - Google Colab Optimized

## Production Ready v4.0 ULTIMATE - Colab Edition

Bu notebook, QWEN3 modelini Türkçe veri setiyle eğitmek için özel olarak Google Colab'a optimize edilmiştir.

### 🎯 Özellikler:
- ✅ Deterministic tokenization
- ✅ Memory efficient EMA and teacher caching
- ✅ Updated dependencies
- ✅ Comprehensive error recovery
- ✅ Config validation layer
- ✅ Health monitoring dashboard
- ✅ Mixed precision auto-detection
- ✅ Dataset streaming
- ✅ Advanced auto-tuning
- ✅ Google Colab optimized
- ✅ Interactive widgets

### 📋 Gereksinimler:
- Google Colab Pro (önerilen) veya ücretsiz GPU
- Google Drive (model ve checkpoint'leri kaydetmek için)

### 🚀 Kullanım:
1. Runtime > Change runtime type > GPU seçin
2. Tüm hücreleri sırayla çalıştırın
3. Eğitim ilerlemesini izleyin

## 🔧 1. Kurulum ve Hazırlık

In [None]:
#@title 📦 Kütüphaneler ve Başlangıç Ayarları { display-mode: "form" }

import os
import sys
import json
import gc
import time
import hashlib
import traceback
import warnings
from pathlib import Path
from typing import Dict, Any, Optional, List, Union, Tuple, Callable
from dataclasses import dataclass, field, asdict
from abc import ABC, abstractmethod
from functools import lru_cache, wraps
from collections import deque
from datetime import datetime
import logging
import psutil
import platform
import subprocess
from contextlib import contextmanager
import threading
from queue import Queue
warnings.filterwarnings('ignore')

# Google Colab detection
IS_COLAB = 'google.colab' in sys.modules
print(f"🔍 Google Colab ortamı: {'✅ Tespit edildi' if IS_COLAB else '❌ Bulunamadı'}")

if IS_COLAB:
    from google.colab import drive, output, files
    from IPython.display import display, HTML, clear_output
    import ipywidgets as widgets
    
    # Enable widgets
    output.enable_custom_widget_manager()
    
    print("📱 Google Colab widget'ları etkinleştirildi")
else:
    print("⚠️ Bu notebook Google Colab için optimize edilmiştir")

In [None]:
#@title 💾 Google Drive Bağlama { display-mode: "form" }

if IS_COLAB:
    try:
        print("🔗 Google Drive bağlanıyor...")
        drive.mount('/content/drive', force_remount=True)
        DRIVE_PATH = Path('/content/drive/MyDrive/qwen_training')
        DRIVE_PATH.mkdir(parents=True, exist_ok=True)
        print(f"✅ Google Drive başarıyla bağlandı: {DRIVE_PATH}")
        
        # Create subdirectories
        (DRIVE_PATH / 'checkpoints').mkdir(exist_ok=True)
        (DRIVE_PATH / 'logs').mkdir(exist_ok=True)
        (DRIVE_PATH / 'models').mkdir(exist_ok=True)
        print("📁 Klasör yapısı oluşturuldu")
        
    except Exception as e:
        print(f"⚠️ Google Drive bağlantısı başarısız: {e}")
        print("📂 Yerel dizin kullanılacak")
        DRIVE_PATH = Path('/content/qwen_training')
        DRIVE_PATH.mkdir(parents=True, exist_ok=True)
else:
    DRIVE_PATH = Path('./qwen_training')
    DRIVE_PATH.mkdir(parents=True, exist_ok=True)

print(f"📍 Çalışma dizini: {DRIVE_PATH}")

In [None]:
#@title 🖥️ GPU Durumu Kontrolü { display-mode: "form" }

if IS_COLAB:
    # Check GPU availability
    import subprocess
    result = subprocess.run(['nvidia-smi'], capture_output=True, text=True)
    
    if result.returncode == 0:
        print("🎮 GPU Bilgileri:")
        print(result.stdout)
    else:
        print("❌ GPU bulunamadı. Runtime > Change runtime type > GPU seçin")
        
    # Check GPU type and memory
    try:
        import torch
        if torch.cuda.is_available():
            gpu_name = torch.cuda.get_device_name(0)
            gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1e9
            print(f"\n🚀 GPU: {gpu_name}")
            print(f"💾 VRAM: {gpu_memory:.1f} GB")
            
            # Colab specific recommendations
            if 'T4' in gpu_name:
                print("📝 T4 GPU tespit edildi - Ücretsiz Colab tier")
                print("💡 Küçük batch size ve hafif model önerilir")
            elif 'P100' in gpu_name:
                print("📝 P100 GPU tespit edildi - Colab Pro")
                print("💡 Orta seviye training mümkün")
            elif 'V100' in gpu_name:
                print("📝 V100 GPU tespit edildi - Colab Pro+")
                print("💡 Yüksek performanslı training mümkün")
            elif 'A100' in gpu_name:
                print("📝 A100 GPU tespit edildi - Premium")
                print("💡 Maximum performans training mümkün")
        else:
            print("❌ CUDA GPU bulunamadı")
    except ImportError:
        print("⚠️ PyTorch henüz yüklenmedi")
else:
    print("ℹ️ Lokal ortam - GPU durumunu manuel kontrol edin")

## 📦 2. Paket Kurulumu

In [None]:
#@title 🔧 Gerekli Paketlerin Kurulumu { display-mode: "form" }

def install_packages():
    """Install required packages optimized for Google Colab"""
    
    print("📦 Paket kurulumu başlıyor...")
    
    # Core packages for Colab
    REQUIRED_PACKAGES = [
        "transformers",
        "datasets", 
        "accelerate",
        "peft",
        "bitsandbytes",
        "sentencepiece",
        "tiktoken",
        "trl",
        "psutil",
        "einops",
        "safetensors"
    ]
    
    OPTIONAL_PACKAGES = [
        "wandb"
    ]
    
    def install_package(package: str, upgrade: bool = False) -> bool:
        try:
            cmd = f"pip install -q {'--upgrade' if upgrade else ''} {package}"
            result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
            if result.returncode == 0:
                print(f"✅ {package}")
                return True
            else:
                print(f"❌ {package}: {result.stderr[:100]}...")
                return False
        except Exception as e:
            print(f"❌ {package}: {str(e)[:100]}...")
            return False
    
    # Install core packages
    print("\n🔧 Temel paketler kuruluyor...")
    for package in REQUIRED_PACKAGES:
        install_package(package)
    
    # Install optional packages
    print("\n🔧 Opsiyonel paketler kuruluyor...")
    for package in OPTIONAL_PACKAGES:
        try:
            install_package(package)
        except:
            print(f"⚠️ {package} atlandı")
    
    print("\n✅ Paket kurulumu tamamlandı!")

# Run installation
install_packages()

# Import check
try:
    import torch
    import transformers
    print(f"\n📊 PyTorch: {torch.__version__}")
    print(f"📊 Transformers: {transformers.__version__}")
    print(f"📊 CUDA Available: {torch.cuda.is_available()}")
except ImportError as e:
    print(f"⚠️ Import hatası: {e}")
    print("🔄 Lütfen runtime'ı restart edin ve tekrar deneyin")

## ⚙️ 3. Eğitim Yapılandırması

In [None]:
#@title 🎯 Eğitim Parametreleri { display-mode: "form" }

# Interactive configuration for Colab
model_name = "microsoft/phi-2" #@param ["microsoft/phi-2", "Qwen/Qwen2.5-7B-Instruct", "Qwen/Qwen2-7B"] {type:"string"}
num_epochs = 3 #@param {type:"slider", min:1, max:10, step:1}
learning_rate = 0.00002 #@param {type:"number"}
batch_size = 1 #@param {type:"slider", min:1, max:8, step:1}
gradient_accumulation_steps = 8 #@param {type:"slider", min:1, max:32, step:1}
max_length = 256 #@param {type:"slider", min:128, max:1024, step:64}
use_lora = True #@param {type:"boolean"}
lora_rank = 8 #@param {type:"slider", min:4, max:64, step:4}
use_4bit = True #@param {type:"boolean"}
max_train_samples = 1000 #@param {type:"slider", min:100, max:10000, step:100}

print(f"📊 Seçilen Model: {model_name}")
print(f"📊 Epoch Sayısı: {num_epochs}")
print(f"📊 Learning Rate: {learning_rate}")
print(f"📊 Batch Size: {batch_size}")
print(f"📊 Gradient Accumulation: {gradient_accumulation_steps}")
print(f"📊 Max Length: {max_length}")
print(f"📊 LoRA: {'✅' if use_lora else '❌'}")
print(f"📊 4-bit Quantization: {'✅' if use_4bit else '❌'}")
print(f"📊 Max Train Samples: {max_train_samples}")

## 🎯 4. Ana Eğitim Kodu

In [None]:
# Import the main training script
exec(open('/content/drive/MyDrive/qwen_training/qwen3_training_production_v4_fixed.py').read()) if IS_COLAB else None

# Or run training directly
if not IS_COLAB:
    print("⚠️ Bu notebook Google Colab için tasarlanmıştır.")
    print("Lokal çalıştırma için qwen3_training_production_v4_fixed.py dosyasını kullanın.")