In [None]:
# 1. Önce paketleri kur
!pip install  peft accelerate bitsandbytes transformers datasets

# 2. Sonra model fine-tuning kodunu çalıştır
from datasets import load_dataset
from transformers import AutoTokenizer

from peft import LoraConfig

Collecting bitsandbytes
  Downloading bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl.metadata (11 kB)
Downloading bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl (61.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.3/61.3 MB[0m [31m33.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.47.0


In [None]:
!pip uninstall -U trl
!pip install -U trl==0.9.6

import trl
print(trl.__version__)


Usage:   
  pip3 uninstall [options] <package> ...
  pip3 uninstall [options] -r <requirements file> ...

no such option: -U
Collecting trl==0.9.6
  Downloading trl-0.9.6-py3-none-any.whl.metadata (12 kB)
Collecting numpy<2.0.0,>=1.18.2 (from trl==0.9.6)
  Downloading numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
Collecting tyro>=0.5.11 (from trl==0.9.6)
  Downloading tyro-0.9.28-py3-none-any.whl.metadata (11 kB)
Collecting shtab>=1.5.6 (from tyro>=0.5.11->trl==0.9.6)
  Downloading shtab-1.7.2-py3-none-any.whl.metadata (7.4 kB)
Downloading trl-0.9.6-py3-none-any.whl (245 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m245.8/245.8 kB[0m [31m23.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━

0.9.6


In [None]:
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    TrainingArguments,
    BitsAndBytesConfig,
    AutoModelForCausalLM
)
from trl import SFTTrainer
from peft import LoraConfig
import torch

# Model ve Tokenizer
model_id = "TURKCELL/Turkcell-LLM-7b-v1"

# 4-bit quantization konfigürasyonu (A100 için BF16 ile)
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,  # Çift quantization ile daha fazla bellek tasarrufu
    bnb_4bit_quant_type="nf4"  # Normal Float 4 quantization
)

# Quantization ile modeli önceden yükle
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=quantization_config,
    device_map="auto",
    low_cpu_mem_usage=True  # CPU bellek kullanımını optimize et
)

# Tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
tokenizer.pad_token = tokenizer.eos_token  # Padding token'ı ayarla
tokenizer.padding_side = 'right'  # SFTTrainer uyarısını gidermek için

# Dataset yükle
dataset = load_dataset("json", data_files="data.json", split="train")

# Formatting function
def formatting_func(example):
    instruction = example.get("instruction", "")
    input_text = example.get("input", "")
    output = example.get("output", "")

    if input_text:
        return f"Soru: {instruction}\nGirdi: {input_text}\nCevap: {output}"
    else:
        return f"Soru: {instruction}\nCevap: {output}"

dataset = dataset.map(lambda x: {"text": formatting_func(x)})

# LoRA config
lora_config = LoraConfig(
    r=16,  # Daha düşük rank değeri ile bellek kullanımını azalt
    lora_alpha=32,
    lora_dropout=0.1,
    target_modules=["q_proj", "v_proj", "k_proj", "o_proj"],  # Daha fazla modül ekle
    bias="none",
    task_type="CAUSAL_LM"
)

# Optimize edilmiş Training args
training_args = TrainingArguments(
    output_dir="./outputs",
    num_train_epochs=10,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=8,  # Daha yüksek gradient accumulation
    learning_rate=2e-4,
    logging_steps=10,
    save_strategy="epoch",
    bf16=True,
    fp16=False,
    gradient_checkpointing=True,
    optim="paged_adamw_8bit",  # Daha iyi bellek yönetimi için
    report_to="none",
    max_grad_norm=0.3,
    warmup_ratio=0.03
)

# Trainer
trainer = SFTTrainer(
    model=model,  # Önceden yüklenmiş quantize edilmiş model
    train_dataset=dataset,
    tokenizer=tokenizer,
    peft_config=lora_config,
    dataset_text_field="text",
    max_seq_length=512,
    args=training_args
)

# CUDA bellek ayarları
torch.cuda.empty_cache()
import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

# Eğitimi başlat
trainer.train()

# Modeli kaydet
trainer.save_model("./outputs")

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/2771 [00:00<?, ? examples/s]


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/2771 [00:00<?, ? examples/s]

  super().__init__(
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
  return fn(*args, **kwargs)


Step,Training Loss
10,3.0289
20,2.6678
30,2.4267
40,1.9949
50,2.0053
60,1.7807
70,1.8517
80,1.744
90,1.5613
100,1.6906


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


In [None]:
from google.colab import drive

# Google Drive'ı /content/drive dizinine bağlar
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!cp -r /content/outputs /content/drive/MyDrive/outputsdposuz

In [None]:
!pip install --upgrade transformers==4.46.2
!pip install --upgrade trl==0.11.4
!pip install --upgrade peft==0.11.1
!pip install --upgrade accelerate==0.33.0

Collecting transformers==4.46.2
  Downloading transformers-4.46.2-py3-none-any.whl.metadata (44 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/44.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.1/44.1 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers<0.21,>=0.20 (from transformers==4.46.2)
  Downloading tokenizers-0.20.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Downloading transformers-4.46.2-py3-none-any.whl (10.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.0/10.0 MB[0m [31m138.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading tokenizers-0.20.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.0/3.0 MB[0m [31m110.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tokenizers, transformers
  Attempting uninstall: tokenize

In [None]:
!pip install -U peft trl transformers accelerate

Collecting peft
  Downloading peft-0.17.1-py3-none-any.whl.metadata (14 kB)
Collecting trl
  Downloading trl-0.21.0-py3-none-any.whl.metadata (11 kB)
Collecting transformers
  Downloading transformers-4.55.3-py3-none-any.whl.metadata (41 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.0/42.0 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
Collecting accelerate
  Downloading accelerate-1.10.0-py3-none-any.whl.metadata (19 kB)
Collecting tokenizers<0.22,>=0.21 (from transformers)
  Downloading tokenizers-0.21.4-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Downloading peft-0.17.1-py3-none-any.whl (504 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m504.9/504.9 kB[0m [31m36.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading trl-0.21.0-py3-none-any.whl (511 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m511.9/511.9 kB[0m [31m42.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading transformers-4.55.3-py3-

In [None]:
# === DPO: SFT sonrası LoRA adapter'larıyla devam ===
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel
from trl import DPOConfig, DPOTrainer
import torch, os

# ---- Model yolları ----
model_id = "TURKCELL/Turkcell-LLM-7b-v1"
sft_adapters_dir = "/content/drive/MyDrive/outputsdposuz"      # SFT sonrası adapter'lar
dpo_output_dir   = "/content/drive/MyDrive/outputsdpolu"  # DPO çıktıları buraya

# ---- 4-bit quantization ----
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.bfloat16
)

# ---- Tokenizer ----
tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
tokenizer.padding_side = "right"
if tokenizer.pad_token is None and tokenizer.eos_token is not None:
    tokenizer.pad_token = tokenizer.eos_token

# ---- Politika modeli (trainable) ----
base_policy = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=quant_config,
    device_map="auto"
)
policy_model = PeftModel.from_pretrained(base_policy, sft_adapters_dir, is_trainable=True)

# ---- Referans modeli (aynı SFT, donmuş) ----
base_ref = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=quant_config,
    device_map="auto"
)
ref_model    = PeftModel.from_pretrained(base_ref, sft_adapters_dir, is_trainable=False)
for p in ref_model.parameters():
    p.requires_grad_(False)

# ---- DPO veri seti ----
# Beklenen kolonlar: "prompt", "chosen", "rejected"
dpo_dataset = load_dataset("json", data_files="dpo_dataset.json", split="train")
# Eğer farklı isimler varsa rename_columns kullan:
# dpo_dataset = dpo_dataset.rename_columns({"soru":"prompt","kazanan":"chosen","kaybeden":"rejected"})

# ---- DPO konfigürasyonu ----
dpo_args = DPOConfig(
    output_dir=dpo_output_dir,
    beta=0.1,
    num_train_epochs=3,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    learning_rate=1e-5,
    logging_steps=10,
    save_strategy="epoch",
    bf16=True,
    fp16=False,
    max_length=1024,
    max_prompt_length=512,
    report_to=[]
)

# ---- CUDA ayarı ----
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
torch.cuda.empty_cache()

# ---- DPO Trainer (eski & yeni TRL uyumlu) ----
try:
    # Yeni TRL (>=0.11) → processing_class parametresi
    dpo_trainer = DPOTrainer(
        model=policy_model,
        ref_model=ref_model,
        train_dataset=dpo_dataset,
        args=dpo_args,
        processing_class=tokenizer
    )
except TypeError:
    # Eski TRL (<=0.10.x) → tokenizer parametresi
    dpo_trainer = DPOTrainer(
        model=policy_model,
        ref_model=ref_model,
        train_dataset=dpo_dataset,
        args=dpo_args,
        tokenizer=tokenizer
    )

# ---- Eğitim ve kayıt ----
dpo_trainer.train()
dpo_trainer.save_model(dpo_output_dir)     # LoRA adapter'ları kaydedilir
tokenizer.save_pretrained(dpo_output_dir)  # tokenizer da aynı klasöre
dpo_trainer.save_state()                   # (opsiyonel) trainer state

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Extracting prompt in train dataset:   0%|          | 0/1454 [00:00<?, ? examples/s]

Applying chat template to train dataset:   0%|          | 0/1454 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/1454 [00:00<?, ? examples/s]

Step,Training Loss
10,0.2677
20,0.0597
30,0.0137
40,0.0019
50,0.0038
60,0.0005
70,0.0027
80,0.0017
90,0.0004
100,0.0006


In [None]:
!cp -r /content/outputs_dpo /content/drive/MyDrive/outputs_dpo

In [None]:
!pip install faiss-cpu

Collecting faiss-cpu
  Downloading faiss_cpu-1.12.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.1 kB)
Downloading faiss_cpu-1.12.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (31.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.4/31.4 MB[0m [31m82.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.12.0


In [None]:
# Bu hücrede RAG için FAISS indekslerini kuruyoruz
# İki ayrı indeks: factual ve procedural
# Embedding modeli: BGE-M3 (çok dilli)

from sentence_transformers import SentenceTransformer
import faiss
import numpy as np

embed_model = SentenceTransformer("BAAI/bge-m3")

# Dataseti güvenli şekilde böl (metadata kontrolü ile)
factual_data = [
    ex for ex in dataset
    if ex and isinstance(ex, dict)
    and "metadata" in ex
    and isinstance(ex["metadata"], dict)
    and ex["metadata"].get("task_type") == "factual"
]

procedural_data = [
    ex for ex in dataset
    if ex and isinstance(ex, dict)
    and "metadata" in ex
    and isinstance(ex["metadata"], dict)
    and ex["metadata"].get("task_type") == "procedural"
]

print(f"Factual kayıt sayısı: {len(factual_data)}")
print(f"Procedural kayıt sayısı: {len(procedural_data)}")

# Embedding ve FAISS index builder
def build_index(data):
    texts = [ex["output"] for ex in data if "output" in ex]
    embeddings = embed_model.encode(texts, normalize_embeddings=True)
    index = faiss.IndexFlatIP(embeddings.shape[1])   # cosine similarity için normalize_embeddings=True + inner product
    index.add(np.array(embeddings))
    return index, texts

factual_index, factual_texts = build_index(factual_data)
procedural_index, procedural_texts = build_index(procedural_data)

print("RAG indexleri hazır! ✅")

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/123 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/54.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/687 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/2.27G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/444 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.27G [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/964 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/191 [00:00<?, ?B/s]

NameError: name 'dataset' is not defined

In [None]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel
from trl import DPOConfig, DPOTrainer
import torch, os

In [None]:
model_id = "TURKCELL/Turkcell-LLM-7b-v1"

In [None]:
# Tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
tokenizer.pad_token = tokenizer.eos_token  # Padding token'ı ayarla
tokenizer.padding_side = 'right'  # SFTTrainer uyarısını gidermek için

In [None]:
# Bu hücrede LoRA'yı merge ediyoruz
from peft import AutoPeftModelForCausalLM
import torch

ft_model = AutoPeftModelForCausalLM.from_pretrained("/content/drive/MyDrive/outputsdpolu", device_map="auto", torch_dtype=torch.float16)
ft_model = ft_model.merge_and_unload()

ft_model.save_pretrained("/content/drive/MyDrive/mergedoutputsdpolu")
tokenizer.save_pretrained("/content/drive/MyDrive/mergedoutputsdpolu")
print("LoRA merge edildi, HuggingFace formatında kaydedildi.")


RuntimeError: Only a single TORCH_LIBRARY can be used to register the namespace prims; please put all of your definitions in a single TORCH_LIBRARY block.  If you were trying to specify implementations, consider using TORCH_LIBRARY_IMPL (which can be duplicated).  If you really intended to define operators for a single namespace in a distributed way, you can use TORCH_LIBRARY_FRAGMENT to explicitly indicate this.  Previous registration of TORCH_LIBRARY was registered at /dev/null:488; latest registration was registered at /dev/null:488

In [None]:
!cp -r /content/merged_modeldpolu /content/drive/MyDrive/merged_modeldpolu

In [None]:
!cp -r /content/outputs /content/drive/MyDrive/outputs