In [25]:
import locale
locale.getpreferredencoding = lambda: "UTF-8"

In [26]:
# Gerekli importlar
import os
from huggingface_hub import snapshot_download

# Model indirme yolu
model_path = os.path.join(os.path.expanduser("~"), "models", "tinyllama-1.1b-chat")


print("📥 TinyLlama-1.1B-Chat modeli indiriliyor...")
snapshot_download(
    repo_id="TinyLlama/TinyLlama-1.1B-Chat-v1.0", 
    local_dir=model_path
)

📥 TinyLlama-1.1B-Chat modeli indiriliyor...


Fetching 10 files:   0%|          | 0/10 [00:00<?, ?it/s]

'C:\\Users\\HP\\models\\tinyllama-1.1b-chat'

In [None]:
# Step 2: Check GPU and setup
import torch
import psutil

print(f"🖥️  GPU: {torch.cuda.get_device_name()}")
print(f"💾 VRAM: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f}GB")

# TinyLlama için optimum ayarlar
gpu_settings = {
    "max_seq_length": 384,          
    "lora_r": 16,                   # Reduced from 32 to balance memory/performance
    "per_device_train_batch_size": 4,  # Small batch size for 6GB VRAM
    "per_device_eval_batch_size": 4,
    "gradient_accumulation_steps": 4,   # Increased to compensate smaller batch size
    "target_modules": [
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj"
    ],
    "learning_rate": 5e-4,          # Slightly reduced for stability
    "warmup_steps": 100,
    "max_grad_norm": 0.3,
    
    # Memory optimizations
    "mixed_precision": "fp16",       # Use mixed precision training
    "load_in_4bit": True,           # Enable 4-bit quantization
    "bnb_4bit_compute_dtype": "float16",
    "bnb_4bit_quant_type": "nf4",   # Use nested float 4 quantization
    "use_gradient_checkpointing": Fçalse,  # Disable gradient checkpointing for simplicity
}

🖥️  GPU: NVIDIA GeForce RTX 4050 Laptop GPU
💾 VRAM: 6.0GB


In [28]:
import os
print(os.environ['PYTHONIOENCODING'])

utf-8


In [29]:
import torch
import gc
import os

def clean_gpu_memory():
    """GPU belleğini temizle ve yeni bellek yönetimi ayarları yap"""
    # CUDA cache'ini temizle
    torch.cuda.empty_cache()
    torch.cuda.reset_peak_memory_stats()
    
    # Garbage collection
    gc.collect()
    
    # PyTorch bellek yönetimi ayarlarını güncelle
    os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:False,max_split_size_mb:128'
    
    # CUDA memory fragmentasyonunu önle
    torch.cuda.memory.set_per_process_memory_fraction(0.7)  # GPU belleğinin %70'ini kullan
    
# GPU belleğini temizle
clean_gpu_memory()

In [30]:
# =============================================================================
# IMPORTS AND SETUP
# =============================================================================

# Temel paketleri import et
print("\n📦 Paketler yükleniyor...")
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer,TrainerCallback
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
import bitsandbytes as bnb
import numpy as np
from tqdm.auto import tqdm
from torch.utils.tensorboard import SummaryWriter
import time
import psutil
import GPUtil
from datetime import datetime
import os




📦 Paketler yükleniyor...


In [31]:
# TensorBoard için özel callback
class TensorBoardCallback(TrainerCallback):
    def __init__(self, writer):
        self.writer = writer
        self.training_start_time = None
        self.last_log_time = None
    
    def on_train_begin(self, args, state, control, **kwargs):
        self.training_start_time = time.time()
        self.last_log_time = self.training_start_time
    
    def on_step_end(self, args, state, control, **kwargs):
        if state.global_step % args.logging_steps == 0:
            # Zaman metrikleri
            current_time = time.time()
            if self.last_log_time:
                steps_per_second = args.logging_steps / (current_time - self.last_log_time)
                elapsed_time = current_time - self.training_start_time
                
                self.writer.add_scalar('Speed/steps_per_second', steps_per_second, state.global_step)
                self.writer.add_scalar('Time/elapsed_hours', elapsed_time / 3600, state.global_step)
            
            # GPU metrikleri
            gpu = GPUtil.getGPUs()[0]
            self.writer.add_scalar('GPU/temperature', gpu.temperature, state.global_step)
            self.writer.add_scalar('GPU/utilization', gpu.load * 100, state.global_step)
            self.writer.add_scalar('GPU/memory_percent', gpu.memoryUtil * 100, state.global_step)
            
            # CPU ve RAM metrikleri
            self.writer.add_scalar('System/cpu_percent', psutil.cpu_percent(), state.global_step)
            self.writer.add_scalar('System/ram_percent', psutil.virtual_memory().percent, state.global_step)
            
            # Sadece grad gerektiren ve quantize edilmemiş parametreler için gradyan normunu hesapla
            grad_norm = 0.0
            param_norm = 0.0
            for name, param in kwargs['model'].named_parameters():
                if param.requires_grad and param.dtype in [torch.float16, torch.float32]:
                    if param.grad is not None:
                        grad_norm += param.grad.data.float().norm(2).item() ** 2
                    param_norm += param.data.float().norm(2).item() ** 2
            
            grad_norm = grad_norm ** 0.5 if grad_norm > 0 else 0.0
            param_norm = param_norm ** 0.5 if param_norm > 0 else 0.0
            
            self.writer.add_scalar('Model/gradient_norm', grad_norm, state.global_step)
            self.writer.add_scalar('Model/parameter_norm', param_norm, state.global_step)
            
            self.last_log_time = current_time
            
            # Her 100 adımda bir bellek temizliği
            
            gpu = GPUtil.getGPUs()[0]
            if gpu.memoryUtil > 0.9:
                gc.collect()
                torch.cuda.empty_cache()
    
    def on_evaluate(self, args, state, control, metrics=None, **kwargs):
        if metrics:
            # Validasyon metriklerini logla
            for key, value in metrics.items():
                if key.startswith('eval_'):
                    metric_name = key.replace('eval_', '')
                    self.writer.add_scalar(f'Evaluation/{metric_name}', value, state.global_step)
            
            # Perplexity hesapla ve logla
            if 'eval_loss' in metrics:
                perplexity = torch.exp(torch.tensor(metrics['eval_loss'])).item()
                self.writer.add_scalar('Evaluation/perplexity', perplexity, state.global_step)
    
    def on_train_end(self, args, state, control, **kwargs):
        self.writer.close()
        
    def on_log(self, args, state, control, logs=None, **kwargs):
        if logs is not None and hasattr(self.writer, 'flush'):
            self.writer.flush()

In [32]:
import torch

# GPU kontrolü
print("CUDA available:", torch.cuda.is_available())
print("Current device:", torch.cuda.current_device())
print("Device count:", torch.cuda.device_count())
print("Device name:", torch.cuda.get_device_name(0))
print("Device properties:", torch.cuda.get_device_properties(0))

CUDA available: True
Current device: 0
Device count: 1
Device name: NVIDIA GeForce RTX 4050 Laptop GPU
Device properties: _CudaDeviceProperties(name='NVIDIA GeForce RTX 4050 Laptop GPU', major=8, minor=9, total_memory=6140MB, multi_processor_count=20, uuid=337819bf-e17c-2b36-f945-389bbb32b034, L2_cache_size=24MB)


In [None]:
from transformers import BitsAndBytesConfig
# Model yolu
model_path = os.path.join(os.path.expanduser("~"), "models", "tinyllama-1.1b-chat")

# TensorBoard için log dizini oluştur
log_dir = os.path.join("./logs", f"tinyllama-finetuning-{datetime.now().strftime('%Y%m%d-%H%M')}")
os.makedirs(log_dir, exist_ok=True)
writer = SummaryWriter(log_dir=log_dir)

# Başlangıç konfigürasyonunu kaydet
writer.add_text("Configuration", f"""
- Model: TinyLlama-1.1B-Chat
- LoRA rank: {gpu_settings['lora_r']}
- Batch size: {gpu_settings['per_device_train_batch_size']}
- Learning rate: {gpu_settings['learning_rate']}
- Max sequence length: {gpu_settings['max_seq_length']}
""")

# Model yükleme ayarları
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
)

# Model ve tokenizer'ı yükle
print("🤖 Model yükleniyor...")
model = AutoModelForCausalLM.from_pretrained(
    model_path,
    torch_dtype=torch.float16,
    device_map="auto",
    quantization_config=quantization_config,
)

tokenizer = AutoTokenizer.from_pretrained(model_path)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

# Model'i 4-bit eğitim için hazırla
model = prepare_model_for_kbit_training(model)

# LoRA konfigürasyonu
lora_config = LoraConfig(
    r=gpu_settings["lora_r"],
    lora_alpha=64,  # Daha yüksek rank için daha yüksek alpha
    target_modules=gpu_settings["target_modules"],
    lora_dropout=0.1,  # Daha yüksek dropout (overfitting'i önlemek için)
    bias="none",
    task_type="CAUSAL_LM"
)

# LoRA modelini oluştur
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

# Training ayarları
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=5,  # Küçük model olduğu için epoch sayısını arttırdık
    per_device_train_batch_size=gpu_settings["per_device_train_batch_size"],
    per_device_eval_batch_size=gpu_settings["per_device_eval_batch_size"],
    gradient_accumulation_steps=gpu_settings["gradient_accumulation_steps"],
    learning_rate=gpu_settings["learning_rate"],
    fp16=True,

    optim="adamw_torch_fused",
    gradient_checkpointing=True,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    greater_is_better=False,
    
    warmup_steps=gpu_settings["warmup_steps"],
    max_grad_norm=gpu_settings["max_grad_norm"],
    
    weight_decay=0.01,
    
    report_to=["tensorboard"],
    # Model specific settings
    group_by_length=True,  # Benzer uzunluktaki örnekleri batch'leme
    remove_unused_columns=False,  # PEFT ile uyumluluk için
    prediction_loss_only=True,
    # Logging ve değerlendirme ayarları
    logging_dir=log_dir,  # TensorBoard log dizini
    logging_strategy="steps",    # Log by steps instead of epochs
    logging_steps=1,            # Log every step
    logging_first_step=True,    # Log the first step
    save_strategy="steps",
    save_steps=100,
    eval_strategy="steps",
    eval_steps=100,
    save_total_limit=3,
)

🤖 Model yükleniyor...
trainable params: 12,615,680 || all params: 1,112,664,064 || trainable%: 1.1338
trainable params: 12,615,680 || all params: 1,112,664,064 || trainable%: 1.1338


In [34]:
import torch

In [35]:
import torch
import gc
from datasets import load_dataset
import evaluate
from transformers import DataCollatorForLanguageModeling
# Bellek temizliği
gc.collect()

print("📦 Tokenizer ve model hazırlandı.")

# Update prepare_data function to handle data types
def prepare_data(examples, tokenizer=tokenizer, gpu_settings=gpu_settings):
    import torch
    conversations = []
    for i in range(len(examples['instruction'])):
        conversation = {
            'instruction': examples['instruction'][i],
            'input': examples.get('input', [''])[i],
            'output': examples['output'][i]
        }
        conversations.append(conversation)
    
    # Tokenize with proper data types
    model_inputs = tokenizer(
        [f"### Instruction: {c['instruction']}\n\n### Input: {c['input']}\n\n### Response: {c['output']}" 
         for c in conversations],
        truncation=True,
        max_length=gpu_settings["max_seq_length"],
        padding="max_length",
        return_tensors=None,
    )
    
    # Convert labels to proper tensor type
    labels = tokenizer(
        [c['output'] for c in conversations],
        truncation=True,
        max_length=gpu_settings["max_seq_length"],
        padding="max_length",
        return_tensors=None,
    )
    
    # Ensure input_ids and labels are the right type
    model_inputs["input_ids"] = torch.tensor(model_inputs["input_ids"], dtype=torch.long)
    model_inputs["labels"] = torch.tensor(labels["input_ids"], dtype=torch.long)
    
    return model_inputs

# Veri setlerini yükle
print("📚 Veri setleri hazırlanıyor...")
datasets = load_dataset("json", data_files={
    "train": "train.jsonl",
    "validation": "val.jsonl"  # Validasyon veri seti
})

# Veri setlerini işle
train_dataset = datasets["train"].map(
    prepare_data,
    batched=True,
    batch_size=100,
    remove_columns=datasets["train"].column_names,
    num_proc=6
)

val_dataset = datasets["validation"].map(
    prepare_data,
    batched=True,
    batch_size=100,
    remove_columns=datasets["validation"].column_names,
    num_proc=6
)

# Değerlendirme metrikleri
rouge_score = evaluate.load('rouge')
bertscore = evaluate.load('bertscore')

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    
    # Tahminleri decode et
    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
    
    # ROUGE skorları
    rouge_output = rouge_score.compute(
        predictions=decoded_preds, 
        references=decoded_labels, 
        use_aggregator=True
    )
    
    # BERTScore
    bert_output = bertscore.compute(
        predictions=decoded_preds, 
        references=decoded_labels, 
        lang="tr"
    )
    
    # Metrikleri birleştir
    metrics = {
        "rouge1": rouge_output["rouge1"],
        "rouge2": rouge_output["rouge2"],
        "rougeL": rouge_output["rougeL"],
        "bertscore_f1": np.mean(bert_output["f1"])
    }
    
    return metrics

# Bellek yönetimi için callback
class MemoryCallback(TrainerCallback):
    def on_step_end(self, args, state, control, **kwargs):
        if state.global_step % 100 == 0:
            gc.collect()
            torch.cuda.empty_cache()

# Tensorboard callback'i oluştur
tensorboard_callback = TensorBoardCallback(writer)

# Eğitimi başlat
print("🚀 Eğitim başlıyor...")
print(f"📊 TensorBoard izleme: tensorboard --logdir {log_dir}")

data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False,
    return_tensors="pt"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
    data_collator=data_collator, 
    callbacks=[MemoryCallback(), tensorboard_callback]  # Yeni callback eklendi
)

# Eğitimi başlat ve metrikleri kaydet
train_results = trainer.train()

# Sonuçları yazdır
print("\n📊 Eğitim sonuçları:")
print(f"Training Loss: {train_results.training_loss:.4f}")
for metric_name, metric_value in train_results.metrics.items():
    print(f"{metric_name}: {metric_value:.4f}")

# TensorBoard writer'ı kapat
writer.close()

📦 Tokenizer ve model hazırlandı.
📚 Veri setleri hazırlanıyor...


No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


🚀 Eğitim başlıyor...
📊 TensorBoard izleme: tensorboard --logdir ./logs\tinyllama-finetuning-20250618-1718


Step,Training Loss,Validation Loss
100,1.3511,1.37389
200,1.3663,1.352985
300,1.3817,1.345238
400,1.3129,1.342475
500,1.3837,1.335803


KeyboardInterrupt: 

In [None]:
tensorboard --logdir=runs

In [None]:
tensorboard --logdir=runs

In [None]:
# Eğitim sonrası model değerlendirme
def test_model(prompt):
    inputs = tokenizer(
        f"Instruct: {prompt}\nOutput:",
        return_tensors="pt",
        max_length=512,
        truncation=True
    ).to("cuda")
    
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=256,
            do_sample=True,
            temperature=0.7,
            top_p=0.95,
        )
    
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    response = response.split("Output:")[-1].strip()
    return response

# Modeli test et
test_prompts = [
    "Merhaba, nasılsın?",
    "Python nedir?",
    "En sevdiğin renk ne?"
]

print("🧪 Model testi başlıyor...")
for prompt in test_prompts:
    print(f"\nSoru: {prompt}")
    print(f"Yanıt: {test_model(prompt)}")

# Detaylı model değerlendirme
def evaluate_model(model, test_cases):
    results = []
    
    print("🧪 Model değerlendirmesi başlıyor...")
    for test_case in tqdm(test_cases):
        inputs = tokenizer(
            f"Instruct: {test_case['question']}\nOutput:",
            return_tensors="pt",
            max_length=512,
            truncation=True
        ).to("cuda")
        
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=256,
                do_sample=True,
                temperature=0.7,
                top_p=0.95,
            )
        
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        response = response.split("Output:")[-1].strip()
        
        # Metrikleri hesapla
        rouge_output = rouge_score.compute(
            predictions=[response],
            references=[test_case['answer']],
            use_aggregator=True
        )
        
        bert_output = bertscore.compute(
            predictions=[response],
            references=[test_case['answer']],
            lang="tr"
        )
        
        results.append({
            'question': test_case['question'],
            'expected': test_case['answer'],
            'generated': response,
            'rouge1': rouge_output['rouge1'],
            'rouge2': rouge_output['rouge2'],
            'rougeL': rouge_output['rougeL'],
            'bertscore': bert_output['f1'][0]
        })
    
    return results

# Test örnekleri
test_cases = [
    {
        'question': 'Yapay zeka nedir?',
        'answer': 'Yapay zeka, insan zekasını taklit eden ve öğrenebilen bilgisayar sistemleridir.'
    },
    {
        'question': 'Python programlama dilinin avantajları nelerdir?',
        'answer': 'Python, okunabilir syntax, zengin kütüphane ekosistemi ve geniş topluluk desteği sunar.'
    },
    # Kendi test örneklerinizi ekleyin
]

# Modeli değerlendir
results = evaluate_model(model, test_cases)

# Sonuçları göster
print("\n📊 Değerlendirme Sonuçları:")
for result in results:
    print(f"\nSoru: {result['question']}")
    print(f"Beklenen: {result['expected']}")
    print(f"Üretilen: {result['generated']}")
    print(f"ROUGE-1: {result['rouge1']:.4f}")
    print(f"ROUGE-2: {result['rouge2']:.4f}")
    print(f"ROUGE-L: {result['rougeL']:.4f}")
    print(f"BERTScore: {result['bertscore']:.4f}")

# Ortalama metrikleri hesapla
avg_metrics = {
    'rouge1': np.mean([r['rouge1'] for r in results]),
    'rouge2': np.mean([r['rouge2'] for r in results]),
    'rougeL': np.mean([r['rougeL'] for r in results]),
    'bertscore': np.mean([r['bertscore'] for r in results])
}

print("\n📈 Ortalama Metrikler:")
for metric_name, metric_value in avg_metrics.items():
    print(f"{metric_name}: {metric_value:.4f}")

# Modeli kaydet
print("\n💾 Model kaydediliyor...")
model.save_pretrained("./results/final_model")
tokenizer.save_pretrained("./results/final_model")

In [None]:
# TensorBoard'u başlat
print("TensorBoard'u başlatmak için terminal/komut isteminde şu komutu çalıştırın:")
print(f"tensorboard --logdir {log_dir}")
print("\nTarayıcınızda http://localhost:6006 adresini açın")