In [7]:
import pandas as pd
from scipy.spatial.distance import cdist
import numpy as np
import torch
import matplotlib.pyplot as plt

In [None]:
# 自体半径
self_radius = 0.01
# self_count = 800

detectors = pd.read_csv(f'detectors.csv')
print(f"detectors:{len(detectors)}")

# 划分自体训练检测半径
train_self = pd.read_csv("check/self/train_self.csv").sample(frac=1, random_state=42)
# train_Self = train_self.sample(n = self_count, random_state=42)
print(f"train_self:{len(train_self)}")

# 测试集自体数量
test_Self = pd.read_csv("check/self/test_self.csv")
test_Self = test_Self.sample(n = 5000, random_state=42)
print(f"test_self:{len(test_Self)}")

# 测试集非自体数量
test_Nonself = pd.read_csv("check/nonself/test_nonself.csv")
test_Nonself = test_Nonself.sample(n = 5000, random_state=42)
print(f"test_nonself:{len(test_Nonself)}")

unknown = pd.read_csv(f'check/unknown.csv')
print(f"unknown:{len(unknown)}")



detectors:18738
train_self:67343
test_self:5000
test_nonself:5000
unknown:3750


In [9]:
# 读取数据
def get_data(data_df):
    data = data_df.map(str).apply(lambda row: f"[{' '.join(row)}]", axis=1).tolist()
    return data

# 添加检测半径
def add_detection_radius(detectors_df, self_radius,train_Self):
    detector_coords = detectors_df.values
    self_coords = train_Self.values
    distances = cdist(detector_coords, self_coords)
    detectors_df['radius'] = distances.min(axis=1) - self_radius
    detectors_df['radius'] = detectors_df['radius'].clip(lower=0)
    return detectors_df

# 评估非自体覆盖率
def evaluate_nonselfcoverage(detectors_df, nonself_df):
    detector_coords = detectors_df.values[:,:-1]
    nonself_coords = nonself_df.values
    distances = cdist(nonself_coords, detector_coords)
    radii = detectors_df['radius'].values.reshape(1, -1)
    covered = (distances <= radii).any(axis=1)
    covered_count = covered.sum()
    
    # nonself_df['covered'] = covered
    # return nonself_df
    return covered_count
    
# 评估非自体覆盖情况
def evaluate_nonself_coverage(detectors_df):
    detector_coords = detectors_df.iloc[:, :-1].values
    nonself_coords = test_Nonself.values
    distances = cdist(nonself_coords, detector_coords)
    radii = detectors_df['radius'].values.reshape(1, -1)
    covered = (distances <= radii).any(axis=1)
    return covered  # 返回每个非自体样本是否被覆盖的布尔数组

# 评估自体覆盖情况
def evaluate_self_coverage(detectors_df):
    detector_coords = detectors_df.iloc[:, :-1].values
    self_coords = test_Self.values
    distances = cdist(self_coords, detector_coords)
    radii = detectors_df['radius'].values.reshape(1, -1)
    covered = (distances <= radii).any(axis=1)
    return covered  # 返回每个自体样本是否被覆盖的布尔数组

# 计算指标
def calculate_metrics(self_covered, nonself_covered, total_self, total_nonself):

    TP = nonself_covered.sum()  # 非自体 被正确检测为 异常
    FP = self_covered.sum()  # 自体 被错误检测为 异常
    FN = total_nonself - TP  # 非自体 被错误检测为 正常 （非自体 没有 被正确检测为 异常 ）
    TN = total_self - FP  # 自体 被正确检测为 正常（自体 没有被 错误检测为 异常）

    # 计算指标
    accuracy = (TP + TN) / (total_self + total_nonself)
    precision = TP / (TP + FP) if (TP + FP) > 0 else 0
    recall = TP / (TP + FN) if (TP + FN) > 0 else 0
    fpr = FP / (FP + TN) if (FP + TN) > 0 else 0
    f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
    confusion_matrix = np.array([[TP, FP], [FN, TN]])
    
    return {
        "Accuracy": accuracy,
        "Precision": precision,
        "Recall": recall,
        "F1 Score": f1_score,
        "False Positive Rate (FPR)": fpr,
        "Confusion Matrix": confusion_matrix
    }

# 绘指标柱状图    
def plot_metrics_bar_chart():
    metrics_data = {}
    with open(f"results.txt", "r") as f:
        lines = f.readlines()
        for line in lines:
            if any(metric in line for metric in ['Accuracy', 'Precision', 'Recall', 'F1 Score', 'False Positive Rate','Unknown covery']):
                metric, value = line.split(':')
                value = float(value.strip().strip('%')) / 100  # 将百分比转换为小数
                metrics_data[metric.strip()] = value

    plt.figure(figsize=(12, 6))
    bars = plt.bar(metrics_data.keys(), metrics_data.values())
    plt.title(f'Performance')
    plt.ylabel('Metric Value')
    plt.xticks(rotation=45, ha='right')
    
    # 在柱子上方显示具体数值
    for bar in bars:
        height = bar.get_height()
        plt.text(bar.get_x() + bar.get_width()/2., height,
                f'{height:.2%}',
                ha='center', va='bottom')
    
    # 调整布局
    plt.tight_layout()
    
    # 保存图表
    plt.savefig(f'metrics_bar_chart.png', dpi=300, bbox_inches='tight')
    plt.close()


# 性能评估
def evaluate_detector_performance(detectors_df, self_radius,train_Self):
    detectors_df_copy = detectors_df.copy()
    detectors_radius = add_detection_radius(detectors_df_copy,self_radius,train_Self)
    
    with open(f"results.txt", "a") as f:
        f.write(f"Number of detectors: {len(detectors_df_copy)}\n")
        f.write(f"Self radius: {self_radius}\n")
        # f.write(f"Number of unknown samples in train_set covered by detectors: {evaluate_nonselfcoverage(detectors_radius,train_set_unknown)}\n")
        # f.write(f"Train set unknown covery:{evaluate_nonselfcoverage(detectors_radius,train_set_unknown)/len(test_set_unknown):.4f}\n")
        # f.write(f"Number of unknown samples in test_set covered by detectors: {evaluate_nonselfcoverage(detectors_radius,test_set_unknown)}\n")
        f.write(f"Unknown covery:{evaluate_nonselfcoverage(detectors_radius,unknown)/len(unknown):.2%}\n")
        f.write(f"Number of train_Self: {len(train_Self)}\n")
    # 评估测试集中自体和非自体的覆盖情况
    self_covered = evaluate_self_coverage(detectors_radius)
    nonself_covered = evaluate_nonself_coverage(detectors_radius)

    # 计算各项指标
    metrics = calculate_metrics(
        self_covered=self_covered,
        nonself_covered=nonself_covered,
        total_self=len(test_Self),
        total_nonself=len(test_Nonself)
    )
    with open(f"results.txt", "a") as f:
        # f.write('Metrics:\n')
        for metric, value in metrics.items(): 
                if metric == "Confusion Matrix":
                    f.write(f"{metric}:\n{value}\n")
                else:
                    f.write(f"{metric}: {value:.2%}\n")
        f.write('\n')
    plot_metrics_bar_chart()

In [10]:
# 熔断个体数量
def plot_metrics_vs_self_count(self_count_range):
    metrics_results = {
        'Accuracy': [],
        'Precision': [],
        'Recall': [],
        'F1 Score': [],
        'False Positive Rate (FPR)': [],
        'Unknown Coverage': []
    }
    
    for self_count in self_count_range:
        
        train_Self = train_self.sample(n=self_count, random_state=42)
        print(f"train_Self:{len(train_Self)}")
        detectors_df_copy = detectors.copy()
        detectors_radius = add_detection_radius(detectors_df_copy, self_radius,train_Self)
        
        unknown_coverage = evaluate_nonselfcoverage(detectors_radius, unknown) / len(unknown)
        metrics_results['Unknown Coverage'].append(unknown_coverage)
        
        self_covered = evaluate_self_coverage(detectors_radius)
        nonself_covered = evaluate_nonself_coverage(detectors_radius)
        metrics = calculate_metrics(
            self_covered=self_covered,
            nonself_covered=nonself_covered,
            total_self=len(test_Self),
            total_nonself=len(test_Nonself)
        )
        
        # 存储结果
        for metric in metrics_results.keys():
            if metric != 'Unknown Coverage':
                metrics_results[metric].append(metrics[metric])
        
    
    # 创建图表
    plt.figure(figsize=(15, 10))
    
    # 绘制所有指标
    for metric, values in metrics_results.items():
        plt.plot(self_count_range, values, marker='o', label=metric)
    
    plt.xlabel('自体训练集数量')
    plt.ylabel('指标值')
    plt.title('不同自体训练集数量下的性能指标变化')
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.grid(True)
    plt.tight_layout()
    
    # 保存图表
    plt.savefig(f'metrics_vs_self_count.png', dpi=300, bbox_inches='tight')
    plt.close()
    
    # 找出最佳F1分数对应的自体数量
    best_f1_idx = np.argmax(metrics_results['F1 Score'])
    best_self_count = self_count_range[best_f1_idx]
    
    # 打印最佳结果
    with open(f"results2.txt", "a") as f:
        f.write("\n=== 不同自体训练集数量的性能评估 ===\n")
        f.write(f"最佳自体训练集数量: {best_self_count}\n")
        for metric, values in metrics_results.items():
            f.write(f"最佳{metric}: {values[best_f1_idx]:.2%}\n")

# 在最后的执行代码块中添加
self_count_range = np.arange(100, 67000, 1000)
plot_metrics_vs_self_count(self_count_range)

train_Self:700
train_Self:710
train_Self:720
train_Self:730
train_Self:740
train_Self:750
train_Self:760
train_Self:770
train_Self:780
train_Self:790


  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(f'metrics_vs_self_count.png', dpi=300, bbox_inches='tight')
  plt.savefig(f'metrics_vs_self_count.png', dpi=300, bbox_inches='tight')
  plt.savefig(f'metrics_vs_self_count.png', dpi=300, bbox_inches='tight')
  plt.savefig(f'metrics_vs_self_count.png', dpi=300, bbox_inches='tight')
  plt.savefig(f'metrics_vs_self_count.png', dpi=300, bbox_inches='tight')
  plt.savefig(f'metrics_vs_self_count.png', dpi=300, bbox_inches='tight')
  plt.savefig(f'metrics_vs_self_count.png', dpi=300, bbox_inches='tight')
  plt.savefig(f'metrics_vs_self_count.png', dpi=300, bbox_inches='tight')
  plt.savefig(f'metrics_vs_sel

In [11]:
# # 熔断半径

# import matplotlib.pyplot as plt
# # plt.style.use('seaborn')

# # ... existing code ...

# # 添加新的函数来收集和绘制结果
# def plot_metrics_vs_radius(radius_range):
#     # 存储不同指标的结果
#     metrics_results = {
#         'Accuracy': [],
#         'Precision': [],
#         'Recall': [],
#         'F1 Score': [],
#         'False Positive Rate (FPR)': [],
#         'Unknown Coverage': []
#     }
    
#     # 收集每个半径下的结果
#     for self_radius in radius_range:
#         detectors_df_copy = detectors.copy()
#         detectors_radius = add_detection_radius(detectors_df_copy,self_radius,train_Self)
        
#         # 计算未知样本覆盖率
#         unknown_coverage = evaluate_nonselfcoverage(detectors_radius, test_set_unknown) / len(test_set_unknown)
#         metrics_results['Unknown Coverage'].append(unknown_coverage)
        
#         # 评估性能指标
#         self_covered = evaluate_self_coverage(detectors_radius)
#         nonself_covered = evaluate_nonself_coverage(detectors_radius)
#         metrics = calculate_metrics(
#             self_covered=self_covered,
#             nonself_covered=nonself_covered,
#             total_self=len(test_Self),
#             total_nonself=len(test_Nonself)
#         )
        
#         # 存储结果
#         for metric in metrics_results.keys():
#             if metric != 'Unknown Coverage':
#                 metrics_results[metric].append(metrics[metric])
    
#     # 创建图表
#     plt.figure(figsize=(15, 10))
    
#     # 绘制所有指标
#     for metric, values in metrics_results.items():
#         plt.plot(radius_range, values, marker='o', label=metric)
    
#     plt.xlabel('Self Radius')
#     plt.ylabel('Metric Value')
#     plt.title('Metrics vs radius')
#     plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
#     plt.grid(True)
#     plt.tight_layout()
    
#     # 保存图表
#     plt.savefig(f'{unknown_type}/metrics_vs_radius.png', dpi=300, bbox_inches='tight')
#     plt.close()
    
#     # 找出最佳F1分数对应的半径
#     best_f1_idx = np.argmax(metrics_results['F1 Score'])
#     best_radius = radius_range[best_f1_idx]
    
#     # 打印最佳结果
#     with open(f"{unknown_type}/results.txt", "a") as f:
#         f.write("\n=== 最佳性能指标 ===\n")
#         f.write(f"最佳自体半径: {best_radius}\n")
#         for metric, values in metrics_results.items():
#             f.write(f"最佳{metric}: {values[best_f1_idx]:.2%}\n")

# # 在最后的执行代码块中替换为
# radius_range = np.arange(0.064, 0.068, 0.0005)
# plot_metrics_vs_radius(radius_range)

In [12]:
# evaluate_detector_performance(detectors,self_radius,train_Self)
