## 1. Setup - Colab Kontrol√º ve Proje Kurulumu

In [None]:
import sys
import os

# Colab kontrol√º
IN_COLAB = 'google.colab' in sys.modules

if IN_COLAB:
    print("‚úÖ Google Colab ortamƒ± tespit edildi")
    
    # GPU kontrol√º
    import torch
    if torch.cuda.is_available():
        print(f"‚úÖ GPU: {torch.cuda.get_device_name(0)}")
        print(f"   VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
    else:
        print("‚ö†Ô∏è  GPU bulunamadƒ±! Runtime > Change runtime type > GPU se√ßin")
    
    # GitHub'dan projeyi klonla
    print("\nüì• Proje indiriliyor...")
    !git clone https://github.com/Aliekinozcetin/Mitre_Attack_TTP_Mapping.git
    
    # Proje dizinine ge√ß
    os.chdir('Mitre_Attack_TTP_Mapping')
    print(f"‚úÖ √áalƒ±≈üma dizini: {os.getcwd()}")
    
    # Gerekli paketleri y√ºkle
    print("\nüì¶ Paketler y√ºkleniyor...")
    !pip install -q -r requirements.txt
    print("‚úÖ T√ºm paketler y√ºklendi")
else:
    print("‚ÑπÔ∏è  Yerel ortamda √ßalƒ±≈üƒ±yorsunuz")

## 2. Import Mod√ºller

In [None]:
import torch
import json
from datetime import datetime

from src.data_loader import prepare_data
from src.model import load_model
from src.train import train_model
from src.evaluate import evaluate_model

print("‚úÖ Mod√ºller y√ºklendi")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

## 3. Konfig√ºrasyon

In [None]:
# Training parametreleri
CONFIG = {
    'model_name': 'bert-base-uncased',  # veya 'jackaduma/SecBERT', 'distilbert-base-uncased'
    'max_length': 512,
    'batch_size': 16,  # GPU varsa 32'ye √ßƒ±karabilirsin
    'learning_rate': 2e-5,
    'num_epochs': 3,
    'warmup_steps': 500,
    'threshold': 0.5,
    'device': 'cuda' if torch.cuda.is_available() else 'cpu',
    'output_dir': './outputs'
}

# Konfig√ºrasyonu yazdƒ±r
print("\n" + "="*70)
print("TRAINING CONFIGURATION")
print("="*70)
for key, value in CONFIG.items():
    print(f"  {key:20s}: {value}")
print("="*70)

## 4. Veri Y√ºkleme ve Hazƒ±rlama

In [None]:
print("\n" + "="*70)
print("STEP 1: DATA PREPARATION")
print("="*70 + "\n")

data = prepare_data(
    model_name=CONFIG['model_name'],
    max_length=CONFIG['max_length']
)

train_dataset = data['train_dataset']
test_dataset = data['test_dataset']
label_list = data['label_list']
num_labels = data['num_labels']

print(f"\n‚úÖ Veri hazƒ±rlama tamamlandƒ±!")
print(f"   Train samples: {len(train_dataset)}")
print(f"   Test samples: {len(test_dataset)}")
print(f"   Number of labels: {num_labels}")

## 5. Model Y√ºkleme

In [None]:
print("\n" + "="*70)
print("STEP 2: MODEL INITIALIZATION")
print("="*70 + "\n")

model = load_model(
    model_name=CONFIG['model_name'],
    num_labels=num_labels,
    device=CONFIG['device']
)

print(f"\n‚úÖ Model y√ºklendi ve {CONFIG['device']} cihazƒ±na ta≈üƒ±ndƒ±!")

## 6. Model Eƒüitimi

In [None]:
print("\n" + "="*70)
print("STEP 3: MODEL TRAINING")
print("="*70 + "\n")

# Output dizini olu≈ütur
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
run_name = f"{CONFIG['model_name'].replace('/', '_')}_{timestamp}"
output_dir = os.path.join(CONFIG['output_dir'], run_name)
os.makedirs(output_dir, exist_ok=True)

# Label listesini kaydet
label_file = os.path.join(output_dir, "labels.json")
with open(label_file, 'w') as f:
    json.dump(label_list, f, indent=2)

# Eƒüitimi ba≈ülat
history = train_model(
    model=model,
    train_dataset=train_dataset,
    test_dataset=test_dataset,
    output_dir=output_dir,
    batch_size=CONFIG['batch_size'],
    learning_rate=CONFIG['learning_rate'],
    num_epochs=CONFIG['num_epochs'],
    warmup_steps=CONFIG['warmup_steps'],
    device=CONFIG['device']
)

# Training ge√ßmi≈üini kaydet
history_file = os.path.join(output_dir, "training_history.json")
with open(history_file, 'w') as f:
    json.dump(history, f, indent=2)

print(f"\n‚úÖ Eƒüitim tamamlandƒ±!")
print(f"   Final loss: {history['train_loss'][-1]:.4f}")

## 7. Model Deƒüerlendirme

In [None]:
print("\n" + "="*70)
print("STEP 4: MODEL EVALUATION")
print("="*70 + "\n")

metrics = evaluate_model(
    model=model,
    test_dataset=test_dataset,
    batch_size=CONFIG['batch_size'],
    device=CONFIG['device'],
    threshold=CONFIG['threshold'],
    label_list=label_list
)

# Metrikleri kaydet
metrics_to_save = {k: float(v) if isinstance(v, (float, int)) else v 
                   for k, v in metrics.items() 
                   if k not in ['predictions', 'labels']}
metrics_file = os.path.join(output_dir, "evaluation_metrics.json")
with open(metrics_file, 'w') as f:
    json.dump(metrics_to_save, f, indent=2)

print(f"\n‚úÖ Deƒüerlendirme tamamlandƒ±!")
print(f"\nKey Metrics:")
print(f"  Micro F1:    {metrics['micro_f1']:.4f}")
print(f"  Macro F1:    {metrics['macro_f1']:.4f}")
print(f"  Samples F1:  {metrics['samples_f1']:.4f}")

## 8. Sonu√ßlarƒ± Kaydet ve ƒ∞ndir

In [None]:
# √ñzet dosyasƒ± olu≈ütur
summary = {
    'model': CONFIG['model_name'],
    'timestamp': timestamp,
    'configuration': CONFIG,
    'data': {
        'num_labels': num_labels,
        'train_samples': len(train_dataset),
        'test_samples': len(test_dataset)
    },
    'training': {
        'final_loss': history['train_loss'][-1]
    },
    'evaluation': metrics_to_save
}

summary_file = os.path.join(output_dir, "summary.json")
with open(summary_file, 'w') as f:
    json.dump(summary, f, indent=2)

print("\n" + "="*70)
print("PIPELINE COMPLETE!")
print("="*70)
print(f"\nSonu√ßlar kaydedildi: {output_dir}")
print("\nDosyalar:")
print(f"  - labels.json")
print(f"  - training_history.json")
print(f"  - evaluation_metrics.json")
print(f"  - summary.json")
print(f"  - final_model.pt")
print(f"  - checkpoint_epoch_*.pt")
print("="*70)

## 9. Colab'da Sonu√ßlarƒ± ƒ∞ndir

In [None]:
if IN_COLAB:
    import shutil
    from google.colab import files
    
    # Sonu√ßlarƒ± ZIP'le
    zip_name = f"{run_name}.zip"
    shutil.make_archive(run_name, 'zip', output_dir)
    
    print(f"\nüì¶ Sonu√ßlar sƒ±kƒ±≈ütƒ±rƒ±lƒ±yor: {zip_name}")
    print(f"   Boyut: {os.path.getsize(zip_name) / (1024*1024):.2f} MB")
    
    # ƒ∞ndir
    print("\n‚¨áÔ∏è  ƒ∞ndirme ba≈ülatƒ±lƒ±yor...")
    files.download(zip_name)
    print("‚úÖ ƒ∞ndirme tamamlandƒ±!")
else:
    print("‚ÑπÔ∏è  Yerel ortamdasƒ±nƒ±z, sonu√ßlar zaten bilgisayarƒ±nƒ±zda.")

## 10. (Opsiyonel) Farklƒ± Modelleri Dene

In [None]:
# SecBERT modelini denemek i√ßin bu h√ºcreyi √ßalƒ±≈ütƒ±r
# CONFIG['model_name'] = 'jackaduma/SecBERT'

# DistilBERT modelini denemek i√ßin bu h√ºcreyi √ßalƒ±≈ütƒ±r  
# CONFIG['model_name'] = 'distilbert-base-uncased'
# CONFIG['batch_size'] = 32  # DistilBERT daha k√º√ß√ºk, batch size artƒ±rƒ±labilir

# Sonra yukarƒ±daki h√ºcreleri tekrar √ßalƒ±≈ütƒ±r