In [3]:
import time
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
import numpy as np
from collections import Counter

def evaluation_para(y_true, y_pred):
    """
    Calculates and returns evaluation metrics for a machine learning model.
    Args:
        y_true (list): A list of ground truth labels.
        y_pred (list): A list of predicted labels from the model.
    Returns:
        list: A list containing the evaluation metrics in the following order:
              [accuracy, precision, recall, fpr, f1].
    """
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='binary', zero_division=0)
    recall = recall_score(y_true, y_pred, average='binary')
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    fpr = fp / (fp + tn) if (fp + tn) > 0 else 0.0
    f1 = f1_score(y_true, y_pred, average='binary')
    metrics = [accuracy, precision, recall, fpr, f1]
    return metrics


def getdata():
    train = np.load('dataset/train.npy')
    test = np.load('dataset/test.npy')
    
    # 使用全量数据（与你之前一致）
    X_train = train[:, :-1]
    y_train = train[:, -1]
    X_test = test[:, :-1]
    y_test = test[:, -1]
    
    # 标签转换：0 → -1, 1 → 1（与 FGSVM 一致）
    y_train = np.where(y_train == 0, -1, 1).astype(int)
    y_test = np.where(y_test == 0, -1, 1).astype(int)
    
    print(f"[DEBUG] 训练集形状: X={X_train.shape}, y={y_train.shape}")
    print(f"[DEBUG] 训练集 y 分布: {Counter(y_train)}")
    print(f"[DEBUG] 测试集 y 分布: {Counter(y_test)}")
    print("测试集标签集合:", set(y_test))
    
    return X_train, y_train, X_test, y_test


if __name__ == '__main__':
    X_train, y_train, X_test, y_test = getdata()
    
    # 特征标准化（SVM 对尺度非常敏感，必须做）
    print("\n正在进行特征标准化 (StandardScaler)...")
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    print("开始训练普通 SVM (SVC with RBF kernel)...")
    start_time = time.time()
    
    model = SVC(
        C=1.0,                    # 建议从 1.0 开始，避免过拟合
        kernel='rbf',
        gamma='scale',            # 自动计算 gamma = 1 / (n_features * X.var())，最稳
        # gamma=0.01,             # 如果想手动调，可取消注释，从 0.001~0.1 试
        class_weight='balanced',  # 自动处理不平衡，给少数类更高权重
        tol=1e-3,
        max_iter=-1,              # 无限制迭代，直到收敛
        random_state=42,
        verbose=True              # 显示 libsvm 训练过程
    )
    
    # 训练
    model.fit(X_train_scaled, y_train)
    
    train_time = time.time() - start_time
    print(f"训练完成，用时: {train_time:.2f} 秒")
    
    # 预测
    start_time = time.time()
    pred = model.predict(X_test_scaled)
    predict_time = time.time() - start_time
    print(f"预测完成，用时: {predict_time:.2f} 秒")
    
    # Debug 打印（与你原代码风格一致）
    print("\n[DEBUG] 测试集预测值分布:", Counter(pred))
    print("[DEBUG] 正类预测数量 (1):", np.sum(pred == 1))
    print("[DEBUG] 负类预测数量 (-1):", np.sum(pred == -1))
    
    cm = confusion_matrix(y_test, pred)
    print("\n混淆矩阵:\n", cm)
    
    tn, fp, fn, tp = cm.ravel()
    print(f"TP={tp}, FP={fp}, TN={tn}, FN={fn}")
    
    result = evaluation_para(y_test, pred)
    print("\n最终评估结果:", result)

[DEBUG] 训练集形状: X=(9527, 16), y=(9527,)
[DEBUG] 训练集 y 分布: Counter({np.int64(-1): 6817, np.int64(1): 2710})
[DEBUG] 测试集 y 分布: Counter({np.int64(-1): 2923, np.int64(1): 1161})
测试集标签集合: {np.int64(1), np.int64(-1)}

正在进行特征标准化 (StandardScaler)...
开始训练普通 SVM (SVC with RBF kernel)...
[LibSVM].
*
optimization finished, #iter = 1456
obj = -785.017927, rho = -0.086582
nSV = 925, nBSV = 860
Total nSV = 925
训练完成，用时: 0.33 秒
预测完成，用时: 0.21 秒

[DEBUG] 测试集预测值分布: Counter({np.int64(-1): 2894, np.int64(1): 1190})
[DEBUG] 正类预测数量 (1): 1190
[DEBUG] 负类预测数量 (-1): 2894

混淆矩阵:
 [[2854   69]
 [  40 1121]]
TP=1121, FP=69, TN=2854, FN=40

最终评估结果: [0.9733104799216454, 0.9420168067226891, 0.9655469422911284, np.float64(0.023605884365378037), 0.9536367503190132]


In [22]:
import time
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.preprocessing import StandardScaler
import numpy as np
from collections import Counter
from granular import FGSVM  # 假设你的 FGSVM 类在 granular.py 中


def evaluation_para(y_true, y_pred):
    """
    Calculates and returns evaluation metrics for a machine learning model.
    Args:
        y_true (list): A list of ground truth labels.
        y_pred (list): A list of predicted labels from the model.
    Returns:
        list: A list containing the evaluation metrics in the following order:
              [accuracy, precision, recall, fpr, f1].
    """
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='binary', zero_division=0)
    recall = recall_score(y_true, y_pred, average='binary')
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    fpr = fp / (fp + tn) if (fp + tn) > 0 else 0.0
    f1 = f1_score(y_true, y_pred, average='binary')
    metrics = [accuracy, precision, recall, fpr, f1]
    return metrics


def getdata():
    train = np.load('dataset/train.npy')
    test = np.load('dataset/test.npy')
    
    # 使用全量数据
    X_train = train[:, :-1]
    y_train = train[:, -1]
    X_test = test[:, :-1]
    y_test = test[:, -1]
    
    # 标签转换：0 → -1, 1 → 1
    y_train = np.where(y_train == 0, -1, 1).astype(int)
    y_test = np.where(y_test == 0, -1, 1).astype(int)
    
    print(f"[DEBUG] 训练集形状: X={X_train.shape}, y={y_train.shape}")
    print(f"[DEBUG] 训练集 y 分布: {Counter(y_train)}")
    print(f"[DEBUG] 测试集 y 分布: {Counter(y_test)}")
    print("测试集标签集合:", set(y_test))
    
    return X_train, y_train, X_test, y_test


if __name__ == '__main__':
    X_train, y_train, X_test, y_test = getdata()
    
    # ★★★ 新增：特征标准化（强烈推荐！）
    print("\n正在进行特征标准化 (StandardScaler)...")
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    print("开始训练 FGSVM...")
    start_time = time.time()
    
    model = FGSVM(
        C=10.0,          # 增大惩罚
        kernel='rbf',
        degree=3,
        gamma=4.9,     # ★★★ 强烈建议先调小！原 4.9 太大了，试 0.01~0.1
                        # 如果想自动：可以自定义实现 gamma='scale'（1 / (n_features * X.var())）
        beta=0.929,
        tol=1e-3,
        max_iter=100    # 增加迭代次数，避免早停
    )
    
    # 使用标准化后的数据训练
    model.fit(X_train_scaled, y_train, reference_n=15)  # 建议增大参考点数
    
    train_time = time.time() - start_time
    print(f"训练完成，用时: {train_time:.2f} 秒")
    
    # 预测（也用标准化后的测试集）
    start_time = time.time()
    pred = model.predict(X_test_scaled)
    predict_time = time.time() - start_time
    print(f"预测完成，用时: {predict_time:.2f} 秒")
    
    # Debug 打印
    print("\n[DEBUG] 测试集预测值分布:", Counter(pred))
    print("[DEBUG] 正类预测数量 (1):", np.sum(pred == 1))
    print("[DEBUG] 负类预测数量 (-1):", np.sum(pred == -1))
    
    cm = confusion_matrix(y_test, pred)
    print("\n混淆矩阵:\n", cm)
    tn, fp, fn, tp = cm.ravel()
    print(f"TP={tp}, FP={fp}, TN={tn}, FN={fn}")
    
    result = evaluation_para(y_test, pred)
    print("\n最终评估结果:", result)

[DEBUG] 训练集形状: X=(9527, 16), y=(9527,)
[DEBUG] 训练集 y 分布: Counter({np.int64(-1): 6817, np.int64(1): 2710})
[DEBUG] 测试集 y 分布: Counter({np.int64(-1): 2923, np.int64(1): 1161})
测试集标签集合: {np.int64(1), np.int64(-1)}

正在进行特征标准化 (StandardScaler)...
开始训练 FGSVM...
Granulating the training data...
Granulation finished, starting training.
开始训练 15 个基 SVM 模型（每个模型独立训练）...


Training base SVMs: 100%|████████████████████████████████████████| 15/15 [00:01<00:00, 11.33model/s]

所有基 SVM 训练完成！
训练完成，用时: 1.38 秒
预测完成，用时: 0.06 秒

[DEBUG] 测试集预测值分布: Counter({np.int64(-1): 2956, np.int64(1): 1128})
[DEBUG] 正类预测数量 (1): 1128
[DEBUG] 负类预测数量 (-1): 2956

混淆矩阵:
 [[2881   42]
 [  75 1086]]
TP=1086, FP=42, TN=2881, FN=75

最终评估结果: [0.9713516160626836, 0.9627659574468085, 0.9354005167958657, np.float64(0.014368799178925761), 0.9488859764089121]





In [28]:
import time
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
import numpy as np
from collections import Counter
from granular import FGSVM  # 假设你的 FGSVM 类在 granular.py 中


def evaluation_para(y_true, y_pred):
    """
    Calculates and returns evaluation metrics for a machine learning model.
    Args:
        y_true (list): A list of ground truth labels.
        y_pred (list): A list of predicted labels from the model.
    Returns:
        list: A list containing the evaluation metrics in the following order:
              [accuracy, precision, recall, fpr, f1].
    """
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='binary', zero_division=0)
    recall = recall_score(y_true, y_pred, average='binary')
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    fpr = fp / (fp + tn) if (fp + tn) > 0 else 0.0
    f1 = f1_score(y_true, y_pred, average='binary')
    metrics = [accuracy, precision, recall, fpr, f1]
    return metrics


def getdata():
    train = np.load('dataset/train.npy')
    test = np.load('dataset/test.npy')
    
    # 使用全量数据
    X_train = train[:, :-1]
    y_train = train[:, -1]
    X_test = test[:, :-1]
    y_test = test[:, -1]
    
    # 标签转换：0 → -1, 1 → 1
    y_train = np.where(y_train == 0, -1, 1).astype(int)
    y_test = np.where(y_test == 0, -1, 1).astype(int)
    
    print(f"[DEBUG] 训练集形状: X={X_train.shape}, y={y_train.shape}")
    print(f"[DEBUG] 训练集 y 分布: {Counter(y_train)}")
    print(f"[DEBUG] 测试集 y 分布: {Counter(y_test)}")
    print("测试集标签集合:", set(y_test))
    
    return X_train, y_train, X_test, y_test


if __name__ == '__main__':
    X_train, y_train, X_test, y_test = getdata()
    
    # ★★★ 升级版数据归一化：优先使用 RobustScaler（对离群点更鲁棒）
    print("\n正在进行特征归一化/标准化...")
    print("当前使用：RobustScaler（推荐用于网络流量数据中的极端值）")
    
    # 选项1：RobustScaler（当前推荐）
    scaler = RobustScaler()
    
    # 选项2：StandardScaler（之前用过的，注释掉即可切换）
    # scaler = StandardScaler()
    
    # 选项3：MinMaxScaler（缩到 [0,1]，有时对 RBF 核友好）
    # scaler = MinMaxScaler()
    
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled  = scaler.transform(X_test)
    
    print("开始训练 FGSVM...")
    start_time = time.time()
    
    model = FGSVM(
        C=10.0,          # 参数不变
        kernel='rbf',
        degree=3,
        gamma=4.9,       # 参数不变（但注意：gamma 仍偏大，后续建议调小）
        beta=0.929,      # 参数不变
        tol=1e-3,
        max_iter=100     # 参数不变
    )
    
    # 使用归一化/标准化后的数据训练
    model.fit(X_train_scaled, y_train, reference_n=15)
    
    train_time = time.time() - start_time
    print(f"训练完成，用时: {train_time:.2f} 秒")
    
    # 预测（也用处理后的测试集）
    start_time = time.time()
    pred = model.predict(X_test_scaled)
    predict_time = time.time() - start_time
    print(f"预测完成，用时: {predict_time:.2f} 秒")
    
    # Debug 打印
    print("\n[DEBUG] 测试集预测值分布:", Counter(pred))
    print("[DEBUG] 正类预测数量 (1):", np.sum(pred == 1))
    print("[DEBUG] 负类预测数量 (-1):", np.sum(pred == -1))
    
    cm = confusion_matrix(y_test, pred)
    print("\n混淆矩阵:\n", cm)
    tn, fp, fn, tp = cm.ravel()
    print(f"TP={tp}, FP={fp}, TN={tn}, FN={fn}")
    
    result = evaluation_para(y_test, pred)
    print("\n最终评估结果:", result)

[DEBUG] 训练集形状: X=(9527, 16), y=(9527,)
[DEBUG] 训练集 y 分布: Counter({np.int64(-1): 6817, np.int64(1): 2710})
[DEBUG] 测试集 y 分布: Counter({np.int64(-1): 2923, np.int64(1): 1161})
测试集标签集合: {np.int64(1), np.int64(-1)}

正在进行特征归一化/标准化...
当前使用：RobustScaler（推荐用于网络流量数据中的极端值）
开始训练 FGSVM...
Granulating the training data...
Granulation finished, starting training.
开始训练 15 个基 SVM 模型（每个模型独立训练）...


Training base SVMs: 100%|████████████████████████████████████████| 15/15 [00:01<00:00, 13.99model/s]

所有基 SVM 训练完成！
训练完成，用时: 1.13 秒
预测完成，用时: 0.05 秒

[DEBUG] 测试集预测值分布: Counter({np.int64(-1): 2962, np.int64(1): 1122})
[DEBUG] 正类预测数量 (1): 1122
[DEBUG] 负类预测数量 (-1): 2962

混淆矩阵:
 [[2889   34]
 [  73 1088]]
TP=1088, FP=34, TN=2889, FN=73

最终评估结果: [0.9738001958863859, 0.9696969696969697, 0.9371231696813093, np.float64(0.011631885049606569), 0.953131844064827]



