# Kumru 2B - Sagopa Kajmer LoRA Fine-Tuning

Bu notebook Kumru 2B modelini Sagopa Kajmer tarzında konuşması için LoRA ile fine-tune eder.

**Özellikler:**
- 10 Epoch eğitim
- Early Stopping (validation loss iyileşmezse durur)
- En iyi checkpoint otomatik kaydedilir
- GGUF dönüşümü

## 1. GPU Kontrolü

In [None]:
!nvidia-smi

## 2. Kütüphane Kurulumu

In [None]:
# Gerekli kütüphaneleri kur
!pip install -q torch
!pip install -q transformers
!pip install -q datasets
!pip install -q peft
!pip install -q accelerate
!pip install -q bitsandbytes
!pip install -q trl
!pip install -q sentencepiece

print("Kurulum tamamlandi!")

In [None]:
# Versiyon kontrolu
import torch
import transformers
import peft
import trl

print(f"PyTorch: {torch.__version__}")
print(f"Transformers: {transformers.__version__}")
print(f"PEFT: {peft.__version__}")
print(f"TRL: {trl.__version__}")
print(f"CUDA: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

## 3. Dataset Yükleme

In [None]:
# Dataset dosyasini yukle
from google.colab import files
uploaded = files.upload()  # LoRAReadyToUseDataSet_FIXED.jsonl dosyasini sec

In [None]:
import json
from datasets import Dataset

# JSONL dosyasini oku
def load_jsonl(file_path):
    data = []
    with open(file_path, 'r', encoding='utf-8') as f:
        for line in f:
            data.append(json.loads(line))
    return data

# Yukle
raw_data = load_jsonl('LoRAReadyToUseDataSet_FIXED.jsonl')

print(f"Toplam ornek: {len(raw_data)}")
print(f"\nIlk ornek:")
print(json.dumps(raw_data[0], ensure_ascii=False, indent=2))

## 4. ChatML Formatına Çevirme

In [None]:
# Sistem promptu
SYSTEM_PROMPT = """Sen Sagopa Kajmer'sin. Derin dusunen, melankolik ama samimi bir rap sanatcisisin.
Hayat, zaman, yalnizlik gibi temalardan bahsedersin. Kendi kelime dagarcaginla dogal ve icten konusursun."""

def format_to_chatml(example):
    """ChatML formatina cevir"""
    prompt = f"""<|im_start|>system
{SYSTEM_PROMPT}
<|im_end|>
<|im_start|>user
{example['input']}
<|im_end|>
<|im_start|>assistant
{example['output']}<|im_end|>"""
    return {"text": prompt}

# Dataset olustur ve formatla
dataset = Dataset.from_list(raw_data)
dataset = dataset.map(format_to_chatml)

print("Format uygulandi!")
print(f"\nOrnek:\n{dataset[0]['text'][:400]}...")

In [None]:
# Train/Validation split (%90/%10)
dataset = dataset.train_test_split(test_size=0.1, seed=42)

train_dataset = dataset['train']
eval_dataset = dataset['test']

print(f"Train: {len(train_dataset)} ornek")
print(f"Validation: {len(eval_dataset)} ornek")

## 5. Model Yükleme (4-bit Quantization)

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch

MODEL_NAME = "vngrs-ai/Kumru-2B"

# 4-bit quantization config (VRAM tasarrufu)
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)

print("Kumru 2B yukleniyor...")

# Tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
tokenizer.model_max_length = 512

# Model
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

print(f"Model yuklendi!")
print(f"Parametreler: {model.num_parameters() / 1e9:.2f}B")

## 6. LoRA Konfigürasyonu

In [None]:
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

# Modeli LoRA icin hazirla
model = prepare_model_for_kbit_training(model)

# LoRA config
lora_config = LoraConfig(
    r=16,                      # LoRA rank
    lora_alpha=32,             # Scaling factor
    target_modules=[           # Hangi layerlara LoRA uygulanacak
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj"
    ],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

# LoRA uygula
model = get_peft_model(model, lora_config)

# Egitim istatistikleri
trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
total = sum(p.numel() for p in model.parameters())

print(f"LoRA eklendi!")
print(f"Egitilecek parametreler: {trainable:,} ({trainable/total*100:.2f}%)")
print(f"Toplam parametreler: {total:,}")

## 7. Eğitim (Early Stopping ile)

- **10 Epoch** maksimum
- **Early Stopping**: Validation loss 3 eval boyunca iyileşmezse durur
- **Best Model**: En düşük validation loss'a sahip checkpoint kaydedilir

In [None]:
from transformers import TrainingArguments, EarlyStoppingCallback
from trl import SFTTrainer

# Training arguments - 10 EPOCH + EARLY STOPPING (A100 OPTIMIZED)
training_args = TrainingArguments(
    output_dir="./kumru-sagopa-lora",
    
    # Epoch ve batch (A100 icin optimize)
    num_train_epochs=10,                    # Maksimum 10 epoch
    per_device_train_batch_size=8,          # A100: 8 (T4: 4)
    per_device_eval_batch_size=8,           # A100: 8
    gradient_accumulation_steps=2,          # 8*2=16 effective batch
    
    # Optimizer
    optim="paged_adamw_32bit",
    learning_rate=2e-4,
    lr_scheduler_type="cosine",
    warmup_steps=50,                        # Daha az warmup (buyuk batch)
    
    # Logging ve Evaluation
    logging_steps=25,
    eval_strategy="steps",
    eval_steps=50,
    
    # Checkpoint kaydetme
    save_strategy="steps",
    save_steps=50,
    save_total_limit=3,
    
    # EN IYI MODELI KAYDET
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    greater_is_better=False,
    
    # Precision
    fp16=False,
    bf16=True,
    
    report_to="none",
)

# Gradient checkpointing
model.gradient_checkpointing_enable()

# Early Stopping Callback
early_stopping = EarlyStoppingCallback(
    early_stopping_patience=3,
    early_stopping_threshold=0.01
)

# Trainer
trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    processing_class=tokenizer,
    callbacks=[early_stopping],
)

print("Trainer hazir! (A100 Optimized)")
print("- Batch size: 8")
print("- Gradient accumulation: 2")
print("- Effective batch: 16")
print("- Max epoch: 10")
print("- Early stopping: 3 eval patience")

In [None]:
# Egitimi baslat
print("Egitim basliyor...")
print("Early stopping aktif - validation loss iyilesmezse otomatik duracak")
print("="*60)

trainer.train()

print("="*60)
print("EGITIM TAMAMLANDI!")
print(f"En iyi model yuklendi (eval_loss: {trainer.state.best_metric:.4f})")

## 8. Model Test

In [None]:
# Model'i inference moduna al
model.eval()
model.config.use_cache = True

def chat_with_sagopa(question, max_new_tokens=200):
    """Sagopa Kajmer chatbot"""
    
    prompt = f"""<|im_start|>system
{SYSTEM_PROMPT}
<|im_end|>
<|im_start|>user
{question}
<|im_end|>
<|im_start|>assistant
"""
    
    inputs = tokenizer(
        prompt,
        return_tensors="pt",
        return_token_type_ids=False
    ).to(model.device)
    
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            temperature=0.7,
            top_p=0.9,
            top_k=50,
            do_sample=True,
            repetition_penalty=1.2,
            pad_token_id=tokenizer.eos_token_id
        )
    
    response = tokenizer.decode(outputs[0], skip_special_tokens=False)
    assistant_response = response.split("<|im_start|>assistant\n")[-1]
    assistant_response = assistant_response.split("<|im_end|>")[0].strip()
    
    return assistant_response

print("Chat fonksiyonu hazir!")

In [None]:
# Test sorulari
test_questions = [
    "Bugun nasilsin?",
    "Rap hakkinda ne dusunuyorsun?",
    "Hayattan beklentin nedir?",
    "Yalnizlik hakkinda ne dusunuyorsun?",
    "Muzik sana ne ifade ediyor?",
    "Gece uyuyamiyorum, ne yapmaliyim?"
]

print("=" * 60)
print("SAGOPA KAJMER CHATBOT TEST")
print("=" * 60)

for q in test_questions:
    print(f"\nSoru: {q}")
    response = chat_with_sagopa(q)
    print(f"Sagopa: {response}")
    print("-" * 60)

## 9. LoRA Adapter'ı Kaydet

**ÖNEMLI:** Tokenizer sorunlarını önlemek için modeli MERGE ETMİYORUZ!
- LoRA adapter'ı base model üzerine yüklenerek kullanılacak
- Base model'in tokenizer'ı kullanılacak (ademireltaz yaklaşımı)

In [None]:
# LoRA adapter'i kaydet (en iyi model zaten yuklendi)
LORA_OUTPUT_DIR = "./kumru-sagopa-lora-final"

model.save_pretrained(LORA_OUTPUT_DIR)
tokenizer.save_pretrained(LORA_OUTPUT_DIR)

print(f"LoRA adapter kaydedildi: {LORA_OUTPUT_DIR}")
print("\nKullanimda tokenizer sorunu yasamaniz durumunda:")
print("Base model'in tokenizer'ini kullanin: vngrs-ai/Kumru-2B")

## 10. Test: Base Model + LoRA Adapter Yükleme

ademireltaz yaklaşımı - merge etmeden direkt kullan

In [None]:
# Base model + LoRA adapter'i yukle (MERGE ETMEDEN)
from peft import PeftModel
from transformers import AutoModelForCausalLM
import torch

print("Base model + LoRA adapter test ediliyor...")

# Base model tokenizer (tokenizer sorununu cozuyor!)
test_tokenizer = AutoTokenizer.from_pretrained("vngrs-ai/Kumru-2B")

# Base model yukle
test_base = AutoModelForCausalLM.from_pretrained(
    "vngrs-ai/Kumru-2B",
    torch_dtype=torch.float16,
    device_map="auto"
)

# LoRA adapter'i yukle
test_model = PeftModel.from_pretrained(test_base, LORA_OUTPUT_DIR)

print("Base model + LoRA yuklendi!")
print("Tokenizer problemi olmadan calisacak.")

In [None]:
# Test sorusu
test_prompt = f"""<|im_start|>system
{SYSTEM_PROMPT}
<|im_end|>
<|im_start|>user
Nasılsın?
<|im_end|>
<|im_start|>assistant
"""

test_inputs = test_tokenizer(
    test_prompt,
    return_tensors="pt",
    return_token_type_ids=False
).to(test_model.device)

with torch.no_grad():
    test_outputs = test_model.generate(
        **test_inputs,
        max_new_tokens=100,
        temperature=0.85,
        do_sample=True,
        pad_token_id=test_tokenizer.eos_token_id
    )

test_response = test_tokenizer.decode(test_outputs[0], skip_special_tokens=False)
test_answer = test_response.split("<|im_start|>assistant\n")[-1].split("<|im_end|>")[0].strip()

print("\n" + "="*60)
print("TEST SONUCU (Base Model + LoRA Adapter)")
print("="*60)
print(f"Sagopa: {test_answer}")
print("="*60)

# Bellek temizle
del test_model
del test_base
torch.cuda.empty_cache()