In [3]:
import numpy as np
import pandas as pd
from sklearn.neighbors import LocalOutlierFactor
from sklearn.ensemble import IsolationForest
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from pyod.models.abod import ABOD
from pyod.models.ecod import ECOD
import matplotlib.pyplot as plt

ModuleNotFoundError: No module named 'pyod'

In [43]:
file_path = 'embeddings.npy'
embeddings = np.load(file_path)
embeddings.shape

(200, 1, 16)

In [44]:
embeddings = embeddings.reshape(200,16)

In [45]:
label = pd.read_csv('Dynamicgraph.csv')['label']
label

0      0
1      0
2      0
3      0
4      0
      ..
195    0
196    0
197    0
198    0
199    0
Name: label, Length: 200, dtype: int64

In [51]:
y_pred_lof

array([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0,
       0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1,
       0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0,
       0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1,
       1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0,
       1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0,
       1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1,
       0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1,
       0, 0])

In [54]:
lables = []
for i in range (y_pred_lof.shape[0]):
    if y_pred_lof[i] == 1:
        lables.append(i)
labels = np.array(lables)
labels

array([  5,  10,  14,  16,  18,  24,  26,  31,  37,  38,  40,  43,  52,
        56,  59,  60,  65,  68,  72,  75,  76,  81,  82,  84,  86,  89,
        92,  93, 100, 102, 103, 109, 110, 111, 115, 119, 121, 123, 124,
       129, 132, 138, 139, 141, 149, 150, 154, 157, 166, 168, 169, 172,
       175, 180, 181, 182, 186, 188, 193, 197])

In [71]:
label = []
for i in range (y_pred_lof.shape[0]):
    if y_pred_lof[i] == y_pred_abod[i] == y_pred_ecod[i] == y_pred_box_plot[i] == y_pred_if[i] == 0:
        label.append(0)
    elif y_pred_lof[i] == 1 or y_pred_abod[i] == 1 or y_pred_ecod[i] == 1 or y_pred_box_plot[i] == 1 or y_pred_if[i] == 1:
        label.append(1)

In [74]:
lof = LocalOutlierFactor(n_neighbors=100, contamination=0.5)
y_pred_lof = lof.fit_predict(embeddings)
y_pred_lof = np.where(y_pred_lof == 1, 0, 1)  # LOF输出1表示正常，-1表示异常，需转换

# 方法2：Isolation Forest (IF)
iforest = IsolationForest(contamination=0.5, random_state=42)
y_pred_if = iforest.fit_predict(embeddings)
y_pred_if = np.where(y_pred_if == 1, 0, 1)  # IF输出1表示正常，-1表示异常，需转换

# 方法3：Box-Plot (使用四分位间距)
def box_plot_outliers(data):
    Q1 = np.percentile(data, 25, axis=0)
    Q3 = np.percentile(data, 75, axis=0)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    return (data < lower_bound) | (data > upper_bound)

outliers = box_plot_outliers(embeddings)
y_pred_box_plot = np.any(outliers, axis=1).astype(int)

# 方法4：ABOD
abod = ABOD(contamination=0.5)
abod.fit(embeddings)
y_pred_abod = abod.labels_

# 方法5：ECOD
ecod = ECOD(contamination=0.5)
ecod.fit(embeddings)
y_pred_ecod = ecod.labels_

# 计算评价指标
methods = ['LOF', 'IF', 'Box-Plot', 'ABOD', 'ECOD']
predictions = [y_pred_lof, y_pred_if, y_pred_box_plot, y_pred_abod, y_pred_ecod]

for method, y_pred in zip(methods, predictions):
    accuracy = accuracy_score(label, y_pred)
    f1 = f1_score(label, y_pred)
    precision = precision_score(label, y_pred)
    recall = recall_score(label, y_pred)
    print(f'{method}:\n\tAccuracy: {accuracy:.4f}\n\tF1 Score: {f1:.4f}\n\tPrecision: {precision:.4f}\n\tRecall: {recall:.4f}\n')


LOF:
	Accuracy: 0.7950
	F1 Score: 0.8038
	Precision: 0.8400
	Recall: 0.7706

IF:
	Accuracy: 0.8550
	F1 Score: 0.8612
	Precision: 0.9000
	Recall: 0.8257

Box-Plot:
	Accuracy: 0.5950
	F1 Score: 0.4088
	Precision: 1.0000
	Recall: 0.2569

ABOD:
	Accuracy: 0.7750
	F1 Score: 0.7847
	Precision: 0.8200
	Recall: 0.7523

ECOD:
	Accuracy: 0.7550
	F1 Score: 0.7656
	Precision: 0.8000
	Recall: 0.7339

