In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
from benchmark import PerformanceMonitor  # Ihr Benchmark-Skript
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix

# --- 1. KONFIGURATION & DATEN ---
MAX_LEN = 200
BATCH_SIZE = 64
EPOCHS = 3
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Laufe auf: {device}")

  import pynvml  # type: ignore[import]


Laufe auf: cuda


In [2]:
def load_raw_data():
    """
    Loads URLs and labels from text files into lists.
    Returns: (list of strings, list of integers)
    """
    urls = []
    labels = []

    # 1. Load Legitimate URLs (Label = 0)
    print("Loading legitimate URLs...")
    try:
        with open(r"..\data\raw\url_legitimate_safebrowsing.txt", "rt", encoding="utf-8") as f:
            for line in f:
                parts = line.strip().split(',')
                url = parts[0].strip()
                if url:
                    urls.append(url)
                    labels.append(0)
    except FileNotFoundError:
        print("Warning: Legitimate file not found.")

    # 2. Load Phishing URLs (Label = 1)
    print("Loading phishing URLs...")
    try:
        with open(r"..\data\raw\url_raw_phishing.txt", "rt", encoding="utf-8") as f:
            for line in f:
                parts = line.strip().split(',')
                url = parts[0].strip()
                if url:
                    urls.append(url)
                    labels.append(1)
    except FileNotFoundError:
        print("Warning: Phishing file not found.")

    return urls, np.array(labels)

# --- 1. PREPARE DATA ---
raw_urls, labels = load_raw_data()

# Split into Train and Test sets
X_train, X_test, y_train, y_test = train_test_split(
    raw_urls, labels, test_size=0.2, random_state=42
)

print(f"Training on {len(X_train)} URLs, Testing on {len(X_test)} URLs.")

Loading legitimate URLs...
Loading phishing URLs...
Training on 225093 URLs, Testing on 56274 URLs.


In [3]:
# --- 2. VECTORIZATION (Manuell für PyTorch) ---
# Wir bauen ein einfaches Vokabular (Zeichen-basiert), genau wie Keras TextVectorization
chars = sorted(list(set("".join(X_train[:1000])))) # Schnelles Vocab aus den ersten 1000 URLs
char_to_int = {c: i+2 for i, c in enumerate(chars)} # +2 für Padding (0) und UNK (1)
vocab_size = len(char_to_int) + 2

def encode_urls(urls, max_len=MAX_LEN):
    encoded_batch = []
    for url in urls:
        # Zeichen zu Int konvertieren
        vec = [char_to_int.get(c, 1) for c in url] # 1 = Unknown
        # Padding oder Truncating
        if len(vec) < max_len:
            vec += [0] * (max_len - len(vec))
        else:
            vec = vec[:max_len]
        encoded_batch.append(vec)
    return np.array(encoded_batch)

print("Vektorisiere Daten (das dauert kurz)...")
X_train_enc = torch.tensor(encode_urls(X_train), dtype=torch.long)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)

X_test_enc = torch.tensor(encode_urls(X_test), dtype=torch.long)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)

# DataLoader erstellen (für Batching)
train_loader = DataLoader(TensorDataset(X_train_enc, y_train_tensor), batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(TensorDataset(X_test_enc, y_test_tensor), batch_size=BATCH_SIZE)

Vektorisiere Daten (das dauert kurz)...


In [4]:
# --- 3. DAS CNN MODELL (PyTorch Version) ---
class CNNModel(nn.Module):
    def __init__(self, vocab_size, embed_dim=32):
        super(CNNModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx=0)
        
        # Conv Layer 1
        self.conv1 = nn.Conv1d(in_channels=embed_dim, out_channels=128, kernel_size=5)
        self.relu = nn.ReLU()
        self.pool1 = nn.MaxPool1d(kernel_size=2)
        
        # Conv Layer 2
        self.conv2 = nn.Conv1d(in_channels=128, out_channels=64, kernel_size=3)
        self.global_pool = nn.AdaptiveMaxPool1d(1) # Global Max Pooling
        
        # Dense Layers
        self.fc1 = nn.Linear(64, 64)
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(64, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        # x shape: [batch, seq_len] -> [batch, seq_len, embed]
        x = self.embedding(x)
        
        # Conv1D erwartet [batch, channels, seq_len], wir müssen die Dimensionen tauschen
        x = x.permute(0, 2, 1) 
        
        x = self.pool1(self.relu(self.conv1(x)))
        x = self.global_pool(self.relu(self.conv2(x)))
        
        # Flatten für Dense Layer: [batch, 64, 1] -> [batch, 64]
        x = x.squeeze(-1) 
        
        x = self.dropout(self.relu(self.fc1(x)))
        x = self.sigmoid(self.fc2(x))
        return x

# Modell initialisieren und auf GPU schieben
model = CNNModel(vocab_size=vocab_size).to(device)
optimizer = optim.Adam(model.parameters())
criterion = nn.BCELoss()

monitor = PerformanceMonitor("CNN")

In [5]:
# --- 4. TRAINING ---
print("Starte Training...")
monitor.start_measurement()

model.train()
for epoch in range(EPOCHS):
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1} fertig.")

monitor.end_measurement(task_name="Training")

Starte Training...
Epoch 1 fertig.
Epoch 2 fertig.
Epoch 3 fertig.
--- Ergebnisse CNN (Training) ---
Zeit: 32.8799s | GPU-Last: 50.1%
VRAM (System): 1450.92 MB | VRAM (Torch): 44.56 MB


{'model': 'CNN',
 'task': 'Training',
 'time_sec': 32.8799,
 'ram_mb': 1666.48,
 'vram_mb': 1450.92,
 'torch_vram_mb': 44.56,
 'cpu_percent': 113.3,
 'gpu_util_percent': 50.1}

In [6]:
# --- 5. INFERENCE & EVALUATION ---
print("Starting inference (entire test set)...")
monitor.start_measurement()

model.eval()
all_preds = []
all_labels = []

# Disable gradient calculation for efficiency during inference
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        X_batch = X_batch.to(device)
        
        # Model prediction
        outputs = model(X_batch)
        
        # Move data to CPU for metric calculation with sklearn
        all_preds.extend(outputs.cpu().numpy())
        all_labels.extend(y_batch.numpy())

# Stop measurement here to capture pure model inference + data transfer time
# Metrics calculation is excluded from the performance benchmark time

# --- CALCULATE METRICS ---
# Convert lists to numpy arrays
y_true = np.array(all_labels)
y_scores = np.array(all_preds) # Probabilities (Sigmoid output)
y_pred_binary = (y_scores > 0.5).astype(int) # Hard predictions (0 or 1)

# 1. Accuracy: Overall correctness
acc = accuracy_score(y_true, y_pred_binary)
# 2. Precision: Ability not to label a negative sample as positive
prec = precision_score(y_true, y_pred_binary, zero_division=0)
# 3. Recall: Ability to find all positive samples
rec = recall_score(y_true, y_pred_binary, zero_division=0)
# 4. F1 Score: Harmonic mean of precision and recall
f1 = f1_score(y_true, y_pred_binary, zero_division=0)
# 5. AUC: Area Under the ROC Curve (performance across thresholds)
auc = roc_auc_score(y_true, y_scores)

# 6. False Positive Rate (FPR)
# Confusion Matrix components: true negative, false positive, false negative, true positive
tn, fp, fn, tp = confusion_matrix(y_true, y_pred_binary).ravel()
fpr = fp / (fp + tn) if (fp + tn) > 0 else 0.0

# Aggregate results
metrics = {
    "accuracy": round(acc, 4),
    "precision": round(prec, 4),
    "recall": round(rec, 4),
    "f1_score": round(f1, 4),
    "auc": round(auc, 4),
    "fpr": round(fpr, 4)
}

# Pass metrics to the monitor for final logging
monitor.end_measurement(task_name="Inferenz", extra_metrics=metrics)

Starting inference (entire test set)...
--- Ergebnisse CNN (Inferenz) ---
Zeit: 1.6696s | GPU-Last: 49.8%
VRAM (System): 1341.04 MB | VRAM (Torch): 34.02 MB


{'model': 'CNN',
 'task': 'Inferenz',
 'time_sec': 1.6696,
 'ram_mb': 1687.63,
 'vram_mb': 1341.04,
 'torch_vram_mb': 34.02,
 'cpu_percent': 85.0,
 'gpu_util_percent': 49.8,
 'accuracy': 0.9926,
 'precision': 0.9965,
 'recall': 0.9899,
 'f1_score': 0.9932,
 'auc': 0.9996,
 'fpr': np.float64(0.0042)}