In [None]:
import pandas as pd

# 读取文本文件，将每行数据拆分为多个列
with open("normal_run_data.txt", 'r') as file:
    lines = file.readlines()
    timestamp_list = []
    id_list = []
    dlc_list = []
    for line in lines:
        parts = line.strip().split()  # 以空格分隔每个部分
        timestamp_list.append(float(parts[1]))
        id_list.append(parts[3])
        dlc_list.append(int(parts[6]))

In [None]:
rules = []
for i in range(len(id_list)):
    if id_list[i] not in [row[0] for row in rules]:
        rules.append([id_list[i], dlc_list[i]])

print("ID" + '\t' + "DLC")
for i in range(len(rules)):
    print(str(rules[i][0]) + '\t' + str(rules[i][1]))

In [None]:
import numpy as np

for i in range(len(rules)):
    if rules[i][0] == '02b0' or rules[i][0] == '05f0':
        rules[i].append(0)
        continue
    
    timestamp = 0
    time_intervals = []
    for j in range(len(id_list)):
        if id_list[j] == rules[i][0]:
            if timestamp == 0:
                timestamp = timestamp_list[j]
            else:
                time_intervals.append(timestamp_list[j] - timestamp)
                timestamp = timestamp_list[j]
    
    # 计算列表的均值
    mean = np.mean(time_intervals)
    # 计算列表的标准差
    std = np.std(time_intervals)
    # 计算变异系数
    coefficient_of_variation = (std / mean) * 100
    
    if coefficient_of_variation > 20: # 非周期性消息
        rules[i].append(0)
    else: # 周期性消息
        rules[i].append(mean)

print("ID" + '\t' + "DLC"+ '\t' + "Mean")
for i in range(len(rules)):
    print(str(rules[i][0]) + '\t' + str(rules[i][1]) + '\t' + str("{:.6f}".format(rules[i][2])))

df_rules = pd.DataFrame(rules, columns=["ID", "DLC", "Mean"])
df_rules.to_csv("Rules.csv", index=False)

In [None]:
from scipy.stats import entropy

block_size = 2000

rules_id = [row[0] for row in rules]
rules_dlc = [row[1] for row in rules]
rules_mean = [row[2] for row in rules]

In [None]:
# 定义一个函数来计算信息熵
def calculate_entropy(block):
    counts = []
    for i in range(len(rules_id)):
        counts.append(block.count(rules_id[i]))
    probabilities = [count / len(block) for count in counts]
    return entropy(probabilities)

# 对每个分组计算信息熵
entropies = []
for i in range(len(id_list) // block_size):
    block = id_list[i * block_size : i * block_size + block_size]
    new_block = [i for i in block if i != '02b0' and i != '05f0']
    entropy_value = calculate_entropy(new_block)
    entropies.append(entropy_value)

# 打印每个分组的信息熵
for i, entropy_value in enumerate(entropies):
    print(f"Group {i+1} Entropy: {entropy_value}")

In [None]:
min_entropy = min(entropies)
max_entropy = max(entropies)

print(min_entropy)
print(max_entropy)

In [None]:
# 定义一个函数来计算相对熵
def calculate_relative_entropy(block, reference_distribution):
    counts_p = []
    for i in range(len(rules_id)):
        counts_p.append(block.count(rules_id[i]))
    probabilities_p = [count_p / len(block) for count_p in counts_p]
    probabilities_q = reference_distribution
    return entropy(probabilities_p, probabilities_q)

# 定义一个固定的参考概率分布 Q
new_id_list = [i for i in id_list if i != '02b0' and i != '05f0']
counts_q = []
for i in range(len(rules_id)):
    counts_q.append(new_id_list.count(rules_id[i]))
reference_distribution = [count_q / len(new_id_list) for count_q in counts_q]

# 对每个分组计算相对熵
relative_entropies = []
for i in range(len(id_list) // block_size):
    block = id_list[i * block_size : i * block_size + block_size]
    new_block = [i for i in block if i != '02b0' and i != '05f0']
    relative_entropy = calculate_relative_entropy(new_block, reference_distribution)
    relative_entropies.append(relative_entropy)

# 打印每个分组的相对熵
for i, relative_entropy in enumerate(relative_entropies):
    print(f"Group {i+1} Relative Entropy: {relative_entropy}")

In [None]:
min_relative_entropy = min(relative_entropies)
max_relative_entropy = max(relative_entropies)

print(min_relative_entropy)
print(max_relative_entropy)

In [None]:
def read_dataset(dataset):
    df = pd.read_csv(dataset)

    print(dataset + " ok!")
    return df[["Timestamp", "ID", "DLC", "Flag"]]

df_DoS = read_dataset("new_DoS_dataset.csv")
df_Fuzzy = read_dataset("new_Fuzzy_dataset.csv")
df_Spoofing = read_dataset("Spoofing_dataset.csv")
df_Replaying = read_dataset("Replaying_dataset.csv")
df_Drop = read_dataset("Drop_dataset.csv")
df_Masquerade = read_dataset("Masquerade_dataset.csv")

In [None]:
print("df_DoS: \t" + str(df_DoS.shape[0]) + '\t' + str(df_DoS.loc[df_DoS["Flag"] == 'R'].shape[0]) + '\t' + str(df_DoS.loc[df_DoS["Flag"] == 'T'].shape[0]))
print("df_Fuzzy: \t" + str(df_Fuzzy.shape[0]) + '\t' + str(df_Fuzzy.loc[df_Fuzzy["Flag"] == 'R'].shape[0]) + '\t' + str(df_Fuzzy.loc[df_Fuzzy["Flag"] == 'T'].shape[0]))
print("df_Spoofing: \t" + str(df_Spoofing.shape[0]) + '\t' + str(df_Spoofing.loc[df_Spoofing["Flag"] == 'R'].shape[0]) + '\t' + str(df_Spoofing.loc[df_Spoofing["Flag"] == 'T'].shape[0]))
print("df_Replaying: \t" + str(df_Replaying.shape[0]) + '\t' + str(df_Replaying.loc[df_Replaying["Flag"] == 'R'].shape[0]) + '\t' + str(df_Replaying.loc[df_Replaying["Flag"] == 'T'].shape[0]))
print("df_Drop: \t" + str(df_Drop.shape[0]) + '\t' + str(df_Drop.loc[df_Drop["Flag"] == 'R'].shape[0]) + '\t' + str(df_Drop.loc[df_Drop["Flag"] == 'T'].shape[0]))
print("df_Masquerade: \t" + str(df_Masquerade.shape[0]) + '\t' + str(df_Masquerade.loc[df_Masquerade["Flag"] == 'R'].shape[0]) + '\t' + str(df_Masquerade.loc[df_Masquerade["Flag"] == 'T'].shape[0]))

In [None]:
def detect(block):
    for row in block:
        if row[1] not in rules_id:
            return True
        if row[2] != rules_dlc[rules_id.index(row[1])]:
            return True
    
    for i in range(len(rules_id)):
        # 非周期性消息
        if rules_mean[i] == 0:
            continue

        # 时间间隔规则
        timestamp = block[0][0]
        for j in range(len(block)):
            if block[j][1] == rules_id[i]:
                if timestamp == block[0][0]:
                    time_interval = block[j][0] - timestamp
                    if time_interval > rules_mean[i] * 2:
                        return True
                else:
                    time_interval = block[j][0] - timestamp
                    if time_interval < rules_mean[i] * 0.5 or time_interval > rules_mean[i] * 2:
                        return True
                timestamp = block[j][0]
            if j == len(block) - 1:
                time_interval = block[j][0] - timestamp
                if time_interval > rules_mean[i] * 2:
                    return True
    '''
    # 信息熵规则
    new_block = [row[1] for row in block if row[1] != '02b0' and row[1] != '05f0']
    entropy_value = calculate_entropy(new_block)
    if entropy_value < min_entropy or entropy_value > max_entropy:
        return True
    
    # 相对熵规则
    new_block = [row[1] for row in block if row[1] != '02b0' and row[1] != '05f0']
    relative_entropy = calculate_relative_entropy(new_block, reference_distribution)
    if relative_entropy < min_relative_entropy or relative_entropy > max_relative_entropy:
        return True
    '''
    return False

In [None]:
attack_types = ["DoS Attack", "Fuzzy Attack", "Spoofing Attack", "Replaying Attack", "Drop Attack", "Masquerade Attack"]
df_list = [df_DoS, df_Fuzzy, df_Spoofing, df_Replaying, df_Drop, df_Masquerade]

for attack_type, df in zip(attack_types, df_list):
    blocks = []
    flags = []
    for i in range(df.shape[0] // block_size):
        block = df.iloc[i * block_size : i * block_size + block_size]
        if 'T' in block["Flag"].values:
            flags.append(True)
        else:
            flags.append(False)
        if attack_type == "Drop Attack":
            block = block.loc[block["Flag"] == 'R']
        blocks.append(block[["Timestamp", "ID", "DLC"]].values.tolist())
        
    TP = 0
    FP = 0
    FN = 0
    TN = 0
    for block, flag in zip(blocks, flags):
        pred = detect(block)
        if pred and flag:
            TP += 1
        elif pred and not flag:
            FP += 1
        elif not pred and flag:
            FN += 1
        else:
            TN += 1

    print(attack_type + ": ")

    # 计算准确率
    accuracy = (TP + TN) / (TP + TN + FP + FN)
    
    # 计算真正率（召回率）
    TPR = TP / (TP + FN)
    
    # 计算假正率
    FPR = FP / (FP + TN)

    print("accuracy = " + str(accuracy))
    print("TPR = " + str(TPR))
    print("FPR = " + str(FPR) + '\n')