# LoRA Fine-Tuning: DIVERSE Dataset (Düzeltilmiş)

**Önemli**: Runtime > Change runtime type > **T4 GPU** seçin!

## 1. GPU Kontrolü

In [None]:
!nvidia-smi

## 2. Paketleri Kur

In [None]:
!pip install -q torch transformers peft datasets accelerate bitsandbytes tqdm

## 3. Projeyi İndir

In [None]:
!rm -rf /content/Lora-fine-tune
!git clone https://github.com/B0DH1i/Lora-fine-tune.git
%cd /content/Lora-fine-tune
!touch config/__init__.py models/__init__.py data/__init__.py training/__init__.py evaluation/__init__.py scripts/__init__.py
print("✓ Proje hazır")

## 4. Google Drive Bağla

In [None]:
from google.colab import drive
drive.mount('/content/drive')

import os
checkpoint_dir = '/content/drive/MyDrive/lora_checkpoints/diverse'
os.makedirs(checkpoint_dir, exist_ok=True)
print(f"✓ Checkpoint dizini: {checkpoint_dir}")

## 5. Config Yükle

In [None]:
import sys
import os

os.chdir('/content/Lora-fine-tune')
sys.path.insert(0, '/content/Lora-fine-tune')

with open('config/training_config.py', 'r') as f:
    exec(f.read(), globals())
with open('config/model_config.py', 'r') as f:
    exec(f.read(), globals())

TrainingConfig.use_flash_attention_2 = False
TrainingConfig.gradient_checkpointing = True
TrainingConfig.per_device_batch_size = 1
TrainingConfig.gradient_accumulation_steps = 32
TrainingConfig.max_length_solution = 512
TrainingConfig.use_8bit = True

print("✓ Config hazır (8-bit + düşük memory)")

## 6. Model Yükle

In [None]:
with open('models/model_loader.py', 'r') as f:
    exec(f.read(), globals())

print("Model yükleniyor (8-bit)...")
model, tokenizer = load_model_and_tokenizer(use_flash_attention=False, load_in_8bit=True)
print("✓ Model yüklendi")

## 7. LoRA Setup

In [None]:
with open('models/lora_setup.py', 'r') as f:
    exec(f.read(), globals())

print("LoRA yapılandırılıyor...")
model = setup_lora(model, use_8bit=True)
print("✓ LoRA yapılandırıldı")

## 8. Dataset Yükle

In [None]:
from datasets import load_dataset

print("DIVERSE dataset yükleniyor...")
dataset = load_dataset("Naholav/CodeGen-Diverse-5K")
dataset = dataset["train"].train_test_split(test_size=0.1, seed=42)
print(f"✓ Dataset split - Train: {len(dataset['train'])}, Test: {len(dataset['test'])}")

def preprocess_function(examples):
    system_prompt = "You are an expert Python programmer. Please read the problem carefully before writing any Python code."
    prompt = f"{system_prompt}\n\nProblem:\n{examples['input']}\n\nSolution:\n{examples['solution']}"
    tokenized = tokenizer(prompt, truncation=True, max_length=512, padding="max_length", return_tensors=None)
    tokenized["labels"] = tokenized["input_ids"].copy()
    return tokenized

print("Preprocessing...")
train_dataset = dataset["train"].map(preprocess_function, remove_columns=dataset["train"].column_names)
eval_dataset = dataset["test"].map(preprocess_function, remove_columns=dataset["test"].column_names)
print(f"✓ Dataset hazır - Train: {len(train_dataset)}, Eval: {len(eval_dataset)}")

## 9. Trainer Setup

In [None]:
with open('training/trainer.py', 'r') as f:
    exec(f.read(), globals())
with open('training/callbacks.py', 'r') as f:
    exec(f.read(), globals())

trainer = setup_trainer(model=model, tokenizer=tokenizer, train_dataset=train_dataset, eval_dataset=eval_dataset, output_dir=checkpoint_dir, run_name="diverse_training_8bit")
print("✓ Trainer hazır")

## 10. Training

In [None]:
print("\nTRAINING BAŞLIYOR!\n")
trainer.train()

## 11. Model Kaydet

In [None]:
final_model_path = os.path.join(checkpoint_dir, "final_model")
trainer.save_model(final_model_path)
tokenizer.save_pretrained(final_model_path)
print(f"✓ Model kaydedildi: {final_model_path}")

## 12. Log'ları İndir

In [None]:
!zip -r diverse_training_logs.zip {checkpoint_dir}/logs
from google.colab import files
files.download('diverse_training_logs.zip')