In [1]:
import tensorflow as tf
print("TensorFlow Version:", tf.__version__)
print(tf.config.list_physical_devices('GPU'))


TensorFlow Version: 2.10.0
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [44]:

from scipy.io import arff
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split


filepath = r'C:\Users\berkb\Desktop\Uni\Bachelor\Projekt\Scenario A1-ARFF\Scenario A1-ARFF\TimeBasedFeatures-Dataset-15s-VPN.arff'

# Daten laden
raw_data, meta = arff.loadarff(filepath)
df = pd.DataFrame(raw_data)

# 2. Labels umwandeln (ARFF speichert sie als Byte-Strings)
df['class1'] = df['class1'].apply(lambda x: x.decode('utf-8'))

# 3. Feature-Auswahl (relevante Spalten für das Modell)
selected_features = [
    'duration', 'total_fiat', 'total_biat', 'min_fiat', 'max_fiat',
    'min_flowiat', 'max_flowiat', 'mean_flowiat', 'std_flowiat',
    'mean_active', 'mean_idle', 'std_active', 'std_idle',
    'flowBytesPerSecond', 'flowPktsPerSecond'
]
df_selected = df[selected_features]
labels = df['class1']

# 4. Behandlung fehlender Werte (z. B. mit Median füllen)
df_selected = df_selected.fillna(df_selected.median())

# 5. Feature-Skalierung (Standardisierung)
scaler = StandardScaler()
df_scaled = scaler.fit_transform(df_selected)

# Umwandlung zurück in DataFrame
df_scaled = pd.DataFrame(df_scaled, columns=selected_features)

# 6. Aufteilung in Trainings- und Testdaten
X_train, X_test, y_train, y_test = train_test_split(
    df_scaled, labels, test_size=0.2, random_state=42, stratify=labels
)

# 7. Überprüfung der fertigen Datensätze
print("Trainingsdaten Shape:", X_train.shape)
print("Testdaten Shape:", X_test.shape)


Trainingsdaten Shape: (15006, 15)
Testdaten Shape: (3752, 15)


In [46]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, classification_report

# Skalierung der Features mit StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Falls X_* noch DataFrames sind, in NumPy-Arrays umwandeln
if isinstance(X_train_scaled, pd.DataFrame):
    X_train_np = X_train_scaled.values
    X_test_np = X_test_scaled.values
else:
    X_train_np = X_train_scaled
    X_test_np = X_test_scaled

# Labels als NumPy-Array speichern
y_train_np = np.array(y_train)
y_test_np = np.array(y_test)

# Nur Non-VPN-Daten für das Training verwenden (unüberwachtes Lernen)
mask_normal_train = (y_train_np == "Non-VPN").astype(bool)
X_train_norm = X_train_np[mask_normal_train]

# Definition der Autoencoder-Architektur
input_dim = X_train_norm.shape[1]
input_layer = keras.Input(shape=(input_dim,))
x = layers.Dense(128, activation='relu')(input_layer)
x = layers.BatchNormalization()(x)
x = layers.Dropout(0.2)(x)
x = layers.Dense(96, activation='relu')(x)
x = layers.BatchNormalization()(x)
x = layers.Dropout(0.2)(x)
x = layers.Dense(48, activation='relu')(x)
x = layers.BatchNormalization()(x)
x = layers.Dropout(0.2)(x)
x = layers.Dense(16, activation='relu')(x)
x = layers.BatchNormalization()(x)
x = layers.Dense(48, activation='relu')(x)
x = layers.BatchNormalization()(x)
x = layers.Dropout(0.2)(x)
x = layers.Dense(96, activation='relu')(x)
x = layers.BatchNormalization()(x)
x = layers.Dropout(0.2)(x)
x = layers.Dense(128, activation='relu')(x)
output_layer = layers.Dense(input_dim, activation='linear')(x)

autoencoder = keras.Model(inputs=input_layer, outputs=output_layer)

# Kompilieren des Autoencoders mit Adamax-Optimizer und Huber-Loss
autoencoder.compile(optimizer=keras.optimizers.Adamax(learning_rate=0.0007), loss=tf.keras.losses.Huber(delta=1.0))

# Callbacks für Early Stopping und Learning Rate Anpassung
early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
clr = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.7, patience=5, min_lr=1e-6)

# Training des Autoencoders
history = autoencoder.fit(
    X_train_norm, X_train_norm,
    epochs=150,
    batch_size=64,
    shuffle=True,
    validation_split=0.1,
    verbose=1,
    callbacks=[clr, early_stopping]
)

# Vorhersage des Autoencoders auf Testdaten
X_test_pred = autoencoder.predict(X_test_np, verbose=0)
mse = np.mean(np.power(X_test_np - X_test_pred, 2), axis=1)

# Berechnung des Rekonstruktionsfehlers auf Trainingsdaten
X_train_norm_pred = autoencoder.predict(X_train_norm, verbose=0)
mse_train_norm = np.mean(np.power(X_train_norm - X_train_norm_pred, 2), axis=1)

# Festlegen des Schwellenwerts für Anomalie-Erkennung
threshold = np.percentile(mse_train_norm, 65)

y_pred = np.where(mse > threshold, 1, 0)  # 1 = Anomalie (VPN), 0 = Normal (Non-VPN)
y_true = np.where(y_test_np == "VPN", 1, 0)

# Berechnung der Confusion Matrix und der Klassifikationsmetriken
cm = confusion_matrix(y_true, y_pred)
print("\nCONFUSION MATRIX:\n", cm)

report = classification_report(y_true, y_pred, target_names=["Non-VPN","VPN"])
print("\nCLASSIFICATION REPORT:\n", report)

Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150

CONFUSION MATRIX:
 [[1192  601]
 [1108  851]]

CLASSIFICATION REPORT:
               precision    recall  f1-score   support

     Non-VPN       0.52      0.66      0.58      1793
         VPN       0.59      0.43      0.50      1959

    accuracy                           0.54      3752
   macro avg       0.55      0.55      0.54      3752
weighted avg       0.55      0.54      0.54      3752



In [3]:
# TEST ALL MODELLS (81 VARIANTS)

import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, classification_report
from scipy.io import arff
from sklearn.model_selection import train_test_split

filepaths = [
    r'C:\Users\berkb\Desktop\Uni\Bachelor\Projekt\Scenario A1-ARFF\Scenario A1-ARFF\TimeBasedFeatures-Dataset-15s-VPN.arff',
    r'C:\Users\berkb\Desktop\Uni\Bachelor\Projekt\Scenario A1-ARFF\Scenario A1-ARFF\TimeBasedFeatures-Dataset-30s-VPN.arff',
    r'C:\Users\berkb\Desktop\Uni\Bachelor\Projekt\Scenario A1-ARFF\Scenario A1-ARFF\TimeBasedFeatures-Dataset-60s-VPN.arff'
]

bottleneck_sizes = [16, 32, 64]
learning_rates = [0.0005, 0.0007, 0.001]
dropout_rates = [0.1, 0.2, 0.3]
thresholds = [50, 65, 75]

best_results = {}

for filepath in filepaths:
    dataset_name = filepath.split('-')[-1].replace('.arff', '')
    print(f"🚀 Starte Training für Datensatz: {dataset_name}")

    raw_data, meta = arff.loadarff(filepath)
    df = pd.DataFrame(raw_data)

    df.replace([b'', ''], np.nan, inplace=True)

    for col in df.select_dtypes([object]):
        df[col] = df[col].apply(lambda x: x.decode('utf-8') if isinstance(x, bytes) else x)

    df['class1'] = df['class1'].astype(str)

    selected_features = [
        'duration', 'total_fiat', 'total_biat', 'min_fiat', 'max_fiat',
        'min_flowiat', 'max_flowiat', 'mean_flowiat', 'std_flowiat',
        'mean_active', 'mean_idle', 'std_active', 'std_idle',
        'flowBytesPerSecond', 'flowPktsPerSecond'
    ]

    df_selected = df[selected_features]
    labels = df['class1']
    df_selected = df_selected.fillna(df_selected.median())

    scaler = StandardScaler()
    df_scaled = scaler.fit_transform(df_selected)
    df_scaled = pd.DataFrame(df_scaled, columns=selected_features)

    X_train, X_test, y_train, y_test = train_test_split(
        df_scaled, labels, test_size=0.2, random_state=42, stratify=labels
    )

    X_train_np = X_train.values
    X_test_np = X_test.values
    y_train_np = np.array(y_train)
    y_test_np = np.array(y_test)

    mask_normal_train = (y_train_np == "Non-VPN").astype(bool)
    X_train_norm = X_train_np[mask_normal_train]

    best_f1 = 0
    best_model = None
    best_params = None

    for bottleneck in bottleneck_sizes:
        for lr in learning_rates:
            for dropout_rate in dropout_rates:
                # Architektur definieren
                input_dim = X_train_norm.shape[1]
                input_layer = keras.Input(shape=(input_dim,))
                x = layers.Dense(128, activation='elu')(input_layer)
                x = layers.BatchNormalization()(x)
                x = layers.Dropout(dropout_rate)(x)
                x = layers.Dense(96, activation='elu')(x)
                x = layers.BatchNormalization()(x)
                x = layers.Dropout(dropout_rate)(x)
                x = layers.Dense(48, activation='elu')(x)
                x = layers.BatchNormalization()(x)
                x = layers.Dense(bottleneck, activation='elu')(x)
                x = layers.BatchNormalization()(x)
                x = layers.Dense(48, activation='elu')(x)
                x = layers.BatchNormalization()(x)
                x = layers.Dropout(dropout_rate)(x)
                x = layers.Dense(96, activation='elu')(x)
                x = layers.BatchNormalization()(x)
                x = layers.Dropout(dropout_rate)(x)
                x = layers.Dense(128, activation='elu')(x)
                output_layer = layers.Dense(input_dim, activation='linear')(x)

                autoencoder = keras.Model(inputs=input_layer, outputs=output_layer)
                autoencoder.compile(optimizer=keras.optimizers.Adamax(learning_rate=lr),
                                   loss=tf.keras.losses.Huber(delta=1.0))

                autoencoder.fit(
                    X_train_norm, X_train_norm,
                    epochs=100,
                    batch_size=64,
                    shuffle=True,
                    validation_split=0.1,
                    verbose=0
                )

                X_test_pred = autoencoder.predict(X_test_np, verbose=0)
                mse = np.mean(np.power(X_test_np - X_test_pred, 2), axis=1)

                X_train_norm_pred = autoencoder.predict(X_train_norm, verbose=0)
                mse_train_norm = np.mean(np.power(X_train_norm - X_train_norm_pred, 2), axis=1)

                for threshold_p in thresholds:
                    threshold = np.percentile(mse_train_norm, threshold_p)
                    print(f"🔄 Testing: {dataset_name} | Bottleneck={bottleneck}, LR={lr}, Dropout={dropout_rate}, Threshold={threshold_p}%")

                    y_pred = np.where(mse > threshold, 1, 0)
                    y_true = np.where(y_test_np == "VPN", 1, 0)

                    cm = confusion_matrix(y_true, y_pred)
                    report = classification_report(y_true, y_pred, target_names=["Non-VPN", "VPN"], output_dict=True)
                    f1_score = report["VPN"]["f1-score"]

                    if f1_score > best_f1:
                        best_f1 = f1_score
                        best_model = autoencoder
                        best_params = (bottleneck, lr, dropout_rate, threshold_p)

    best_results[dataset_name] = {
        "best_f1": best_f1,
        "best_params": best_params
    }

    print(f"\n Best Model for {dataset_name}:")
    print(f"  - Bottleneck: {best_params[0]}")
    print(f"  - Learning Rate: {best_params[1]}")
    print(f"  - Dropout Rate: {best_params[2]}")
    print(f"  - Threshold: {best_params[3]}%")
    print(f"  - Best F1-Score: {best_f1:.4f}")
#

🚀 Starte Training für Datensatz: VPN
🔄 Testing: VPN | Bottleneck=16, LR=0.0005, Dropout=0.1, Threshold=50%
🔄 Testing: VPN | Bottleneck=16, LR=0.0005, Dropout=0.1, Threshold=65%
🔄 Testing: VPN | Bottleneck=16, LR=0.0005, Dropout=0.1, Threshold=75%
🔄 Testing: VPN | Bottleneck=16, LR=0.0005, Dropout=0.2, Threshold=50%
🔄 Testing: VPN | Bottleneck=16, LR=0.0005, Dropout=0.2, Threshold=65%
🔄 Testing: VPN | Bottleneck=16, LR=0.0005, Dropout=0.2, Threshold=75%
🔄 Testing: VPN | Bottleneck=16, LR=0.0005, Dropout=0.3, Threshold=50%
🔄 Testing: VPN | Bottleneck=16, LR=0.0005, Dropout=0.3, Threshold=65%
🔄 Testing: VPN | Bottleneck=16, LR=0.0005, Dropout=0.3, Threshold=75%
🔄 Testing: VPN | Bottleneck=16, LR=0.0007, Dropout=0.1, Threshold=50%
🔄 Testing: VPN | Bottleneck=16, LR=0.0007, Dropout=0.1, Threshold=65%
🔄 Testing: VPN | Bottleneck=16, LR=0.0007, Dropout=0.1, Threshold=75%
🔄 Testing: VPN | Bottleneck=16, LR=0.0007, Dropout=0.2, Threshold=50%
🔄 Testing: VPN | Bottleneck=16, LR=0.0007, Dropout=0.

In [11]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, classification_report
from scipy.io import arff
from sklearn.model_selection import train_test_split
import os

# **Beste Hyperparameter pro Datensatz**
best_params = {
    "15s": {"bottleneck": 16, "lr": 0.001, "dropout": 0.1, "threshold_p": 50},
    "30s": {"bottleneck": 32, "lr": 0.0007, "dropout": 0.2, "threshold_p": 50},
    "60s": {"bottleneck": 16, "lr": 0.0005, "dropout": 0.1, "threshold_p": 50}
}

# **Datensätze**
filepaths = {
    "15s": r'C:\Users\berkb\Desktop\Uni\Bachelor\Projekt\Scenario A1-ARFF\Scenario A1-ARFF\TimeBasedFeatures-Dataset-15s-VPN.arff',
    "30s": r'C:\Users\berkb\Desktop\Uni\Bachelor\Projekt\Scenario A1-ARFF\Scenario A1-ARFF\TimeBasedFeatures-Dataset-30s-VPN.arff',
    "60s": r'C:\Users\berkb\Desktop\Uni\Bachelor\Projekt\Scenario A1-ARFF\Scenario A1-ARFF\TimeBasedFeatures-Dataset-60s-VPN.arff'
}

# **Speicherort für Modelle**
model_dir = "saved_models"
os.makedirs(model_dir, exist_ok=True)

# **Trainiere & teste je Datensatz mit besten Hyperparametern**
for dataset_name, filepath in filepaths.items():
    print(f"\n🚀 Starte Training & Test für Datensatz: {dataset_name}")

    # **Beste Parameter für diesen Datensatz**
    params = best_params[dataset_name]
    bottleneck = params["bottleneck"]
    learning_rate = params["lr"]
    dropout_rate = params["dropout"]
    threshold_p = params["threshold_p"]

    # **Daten laden**
    raw_data, meta = arff.loadarff(filepath)
    df = pd.DataFrame(raw_data)

    df.replace([b'', ''], np.nan, inplace=True)
    for col in df.select_dtypes([object]):
        df[col] = df[col].apply(lambda x: x.decode('utf-8') if isinstance(x, bytes) else x)

    df['class1'] = df['class1'].astype(str)

    # **Feature-Engineering**
    selected_features = [
        'duration', 'total_fiat', 'total_biat', 'min_fiat', 'max_fiat',
        'min_flowiat', 'max_flowiat', 'mean_flowiat', 'std_flowiat',
        'mean_active', 'mean_idle', 'std_active', 'std_idle',
        'flowBytesPerSecond', 'flowPktsPerSecond'
    ]

    df_selected = df[selected_features]
    labels = df['class1']
    df_selected = df_selected.fillna(df_selected.median())

    # **Skalierung**
    scaler = StandardScaler()
    df_scaled = scaler.fit_transform(df_selected)
    df_scaled = pd.DataFrame(df_scaled, columns=selected_features)

    # **Train-Test-Split**
    X_train, X_test, y_train, y_test = train_test_split(
        df_scaled, labels, test_size=0.2, random_state=42, stratify=labels
    )

    X_train_np = X_train.values
    X_test_np = X_test.values
    y_train_np = np.array(y_train)
    y_test_np = np.array(y_test)

    mask_normal_train = (y_train_np == "Non-VPN").astype(bool)
    X_train_norm = X_train_np[mask_normal_train]

    # **Autoencoder Architektur**
    input_dim = X_train_norm.shape[1]
    input_layer = keras.Input(shape=(input_dim,))
    x = layers.Dense(128, activation='elu')(input_layer)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(dropout_rate)(x)
    x = layers.Dense(96, activation='elu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(dropout_rate)(x)
    x = layers.Dense(48, activation='elu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dense(bottleneck, activation='elu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dense(48, activation='elu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(dropout_rate)(x)
    x = layers.Dense(96, activation='elu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(dropout_rate)(x)
    x = layers.Dense(128, activation='elu')(x)
    output_layer = layers.Dense(input_dim, activation='linear')(x)

    autoencoder = keras.Model(inputs=input_layer, outputs=output_layer)
    autoencoder.compile(optimizer=keras.optimizers.Adamax(learning_rate=learning_rate),
                        loss=tf.keras.losses.Huber(delta=1.0))

    # **Training des Autoencoders**
    autoencoder.fit(
        X_train_norm, X_train_norm,
        epochs=100,
        batch_size=64,
        shuffle=True,
        validation_split=0.1,
        verbose=1
    )

    # **Threshold berechnen**
    X_train_norm_pred = autoencoder.predict(X_train_norm, verbose=0)
    mse_train_norm = np.mean(np.power(X_train_norm - X_train_norm_pred, 2), axis=1)
    threshold = np.percentile(mse_train_norm, threshold_p)

    print(f"\n✅ Training für {dataset_name} abgeschlossen! Schwellenwert = {threshold:.6f}\n")
    
    # **Modell testen**
    X_test_pred = autoencoder.predict(X_test_np, verbose=0)
    mse = np.mean(np.power(X_test_np - X_test_pred, 2), axis=1)
    
    y_pred = np.where(mse > threshold, 1, 0)
    y_true = np.where(y_test_np == "VPN", 1, 0)

    # **Ergebnisse ausgeben**
    cm = confusion_matrix(y_true, y_pred)
    report = classification_report(y_true, y_pred, target_names=["Non-VPN", "VPN"])

    print(f"\n🏆 Ergebnisse für {dataset_name}:")
    print("CONFUSION MATRIX:\n", cm)
    print("CLASSIFICATION REPORT:\n", report)

    # **Modell speichern**
    model_path = os.path.join(model_dir, f"autoencoder_{dataset_name}.h5")
    autoencoder.save(model_path)
    print(f"✅ Modell gespeichert: {model_path}")



🚀 Starte Training & Test für Datensatz: 15s
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Ep

In [3]:
import tensorflow as tf
import os

# Pfad zu den gespeicherten Modellen (ggf. anpassen)
model_dir = "C:/Users/berkb/Desktop/Uni/Bachelor/Projekt/Scenario A1-ARFF/Scenario A1-ARFF/saved_models"

# Modelle laden
models = {
    "15s": tf.keras.models.load_model(os.path.join(model_dir, "autoencoder_15s.h5")),
    "30s": tf.keras.models.load_model(os.path.join(model_dir, "autoencoder_30s.h5")),
    "60s": tf.keras.models.load_model(os.path.join(model_dir, "autoencoder_60s.h5")),
}

print(" Alle Modelle erfolgreich geladen!")


for dataset_name, model in models.items():
    print(f"\n🚀 Teste Modell {dataset_name} auf den passenden Datensatz...")
    
    # **Den passenden Datensatz laden**
    test_filepath = filepaths[dataset_name]
    raw_data, meta = arff.loadarff(test_filepath)
    df = pd.DataFrame(raw_data)

    df.replace([b'', ''], np.nan, inplace=True)
    for col in df.select_dtypes([object]):
        df[col] = df[col].apply(lambda x: x.decode('utf-8') if isinstance(x, bytes) else x)

    df['class1'] = df['class1'].astype(str)

    df_selected = df[selected_features]
    labels = df['class1']
    df_selected = df_selected.fillna(df_selected.median())

    scaler = StandardScaler()
    df_scaled = scaler.fit_transform(df_selected)
    df_scaled = pd.DataFrame(df_scaled, columns=selected_features)

    X_test = df_scaled.values
    y_test = np.array(labels)

    # **Vorhersage mit dem Modell**
    X_test_pred = model.predict(X_test, verbose=0)
    mse = np.mean(np.power(X_test - X_test_pred, 2), axis=1)

    # **Threshold aus dem Training nutzen (manuell anpassen oder berechnen)**
    threshold = np.percentile(mse, 50)  

    # **Anomalie-Erkennung**
    y_pred = np.where(mse > threshold, 1, 0)  
    y_true = np.where(y_test == "VPN", 1, 0)

    # **Ergebnisse ausgeben**
    cm = confusion_matrix(y_true, y_pred)
    report = classification_report(y_true, y_pred, target_names=["Non-VPN", "VPN"])

    print(f"\n Testergebnisse für {dataset_name}:")
    print("CONFUSION MATRIX:\n", cm)
    print("CLASSIFICATION REPORT:\n", report)


 Alle Modelle erfolgreich geladen!

🚀 Teste Modell 15s auf den passenden Datensatz...


NameError: name 'filepaths' is not defined