In [None]:
!pip install transformers accelerate peft bitsandbytes datasets sentencepiece

In [None]:
import torch
from huggingface_hub import login
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    Trainer,
    DefaultDataCollator,
)
from peft import LoraConfig, get_peft_model

# do hugging face login with token to fetch base model
HF_TOKEN = "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"  # Gerekirse ekle, yoksa bırak boş
if HF_TOKEN and HF_TOKEN.startswith("hf_"):
    login(token=HF_TOKEN)


# 4-bit Quantization Config

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,            # 4-bit quant
    bnb_4bit_quant_type="nf4",    # nf4 veya fp4
    bnb_4bit_use_double_quant=True,# QLoRA makalesine göre
    bnb_4bit_compute_dtype=torch.float16,
)

# Load Model + Tokenizer  (4-bit)
model_name = "mistralai/Mistral-7B-v0.1"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token  # Mistral/LLama => pad yok, eos_token yap

print("Model yükleniyor 4-bit quantization ile...")
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    quantization_config=bnb_config,
    torch_dtype=torch.float16,
)

# lora settings
# QLoRA demek: Model 4-bit quant, LoRA katmanları 16-bit
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "v_proj"]
)

model = get_peft_model(model, lora_config)

# dataset
# fikralar_cleaned.json must be loaded to google collabs
data_file = "fikralar_cleaned.json"  
raw_dataset = load_dataset("json", data_files=data_file)
dataset = raw_dataset["train"].train_test_split(test_size=0.1, seed=42)
train_data = dataset["train"]
val_data   = dataset["test"]

# tokenize function for our custom 'fıkra' data
def tokenize_function(batch):
    tokens = tokenizer(
        batch["content"],
        max_length=512,
        truncation=True,
        padding="max_length",
    )
    tokens = dict(tokens)
    # "labels" = "input_ids" (Causal LM format)
    tokens["labels"] = tokens["input_ids"].copy()
    return tokens

train_data = train_data.map(
    tokenize_function,
    batched=True,
    remove_columns=train_data.column_names
)
val_data = val_data.map(
    tokenize_function,
    batched=True,
    remove_columns=val_data.column_names
)

data_collator = DefaultDataCollator()

training_args = TrainingArguments(
    output_dir="fikra-mistral-4bit",
    overwrite_output_dir=True,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=16,
    num_train_epochs=3,
    learning_rate=2e-4,
    fp16=True,                # LoRA katmanları 16-bit
    gradient_checkpointing=True,
    logging_steps=10,
    save_strategy="epoch",
    save_total_limit=1,
    logging_dir="logs",
    report_to="none",
)

model.enable_input_require_grads()  # LoRA docs: For partial parameter fine-tuning
model.train() # sets training mode


trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_data,
    eval_dataset=val_data,
    tokenizer=tokenizer,
    data_collator=data_collator,
)

# Start the finetuning
print("🤖 Fine-tune (4-bit) başlıyor...")
trainer.train()

# Save the model
trainer.save_model("fikra-mistral-4bit")
print("✅ Eğitim tamamlandı ve model kaydedildi.")

# Run a inference 
prompt = "Bir gün Temel, Dursun ve Nasreddin Hoca bir araya gelmiş..."
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

with torch.inference_mode():
    output = model.generate(
        **inputs,
        max_new_tokens=150,
        do_sample=True,
        temperature=1.0,
        top_p=0.9,
        repetition_penalty=1.1,
    )

print("\n🎉 Üretilen Fıkra:")
print(tokenizer.decode(output[0], skip_special_tokens=True))
