In [None]:
import pandas as pd

# 读取文本文件，将每行数据拆分为多个列
with open("normal_run_data.txt", 'r') as file:
    lines = file.readlines()
    timestamp_list = []
    id_list = []
    dlc_list = []
    for line in lines:
        parts = line.strip().split()  # 以空格分隔每个部分
        timestamp_list.append(float(parts[1]))
        id_list.append(parts[3])
        dlc_list.append(int(parts[6]))

In [None]:
rules = []
for i in range(len(id_list)):
    if id_list[i] not in [row[0] for row in rules]:
        rules.append([id_list[i], dlc_list[i]])

In [None]:
import numpy as np

for i in range(len(rules)):
    if rules[i][0] == '02b0' or rules[i][0] == '05f0':
        rules[i].append(0)
        rules[i].append(0)
        continue
    
    timestamp = 0
    time_intervals = []
    for j in range(len(id_list)):
        if id_list[j] == rules[i][0]:
            if timestamp == 0:
                timestamp = timestamp_list[j]
            else:
                time_intervals.append(timestamp_list[j] - timestamp)
                timestamp = timestamp_list[j]
    
    # 计算列表的均值
    mean = np.mean(time_intervals)
    # 计算列表的标准差
    std = np.std(time_intervals)
    # 计算变异系数
    coefficient_of_variation = (std / mean) * 100
    
    if coefficient_of_variation > 20: # 非周期性消息
        rules[i].append(0)
    else: # 周期性消息
        rules[i].append(mean)

In [None]:
rules_id = [row[0] for row in rules]
rules_dlc = [row[1] for row in rules]
rules_mean = [row[2] for row in rules]

In [None]:
def read_dataset(dataset):
    df = pd.read_csv(dataset)

    # 重新命名列
    df.columns = ["Timestamp", "ID", "DLC", "DATA[0]","DATA[1]","DATA[2]","DATA[3]","DATA[4]","DATA[5]","DATA[6]","DATA[7]","Flag"]

    # 设置 Flag 列的值
    df["Flag"] = df.apply(lambda row: row["Flag"] if row["DLC"] == 8 else row[f"DATA[{row['DLC']}]"], axis=1)

    # 设置 DATA 列的值
    for i in range(8):
        df[f"DATA[{i}]"] = df.apply(lambda row: row[f"DATA[{i}]"] if row["DLC"] > i else "00", axis=1)

    print(dataset + " ok!")
    return df

df_DoS = read_dataset("DoS_dataset.csv")
df_Fuzzy = read_dataset("Fuzzy_dataset.csv")
df_RPM = read_dataset("RPM_dataset.csv")

In [None]:
def process_dataset(df, dataset):
    data_list = df.head(2000000).values.tolist()
    
    for i in range(len(rules_id)):
        # 非周期性消息
        if rules_mean[i] == 0:
            continue

        # 处理缺失值
        count = len(data_list)
        timestamp = 0
        for j in range(count):
            if data_list[j][1] == rules_id[i] and data_list[j][11] == 'R':
                if timestamp == 0:
                    timestamp = data_list[j][0]
                else:
                    time_interval = data_list[j][0] - timestamp
                    if round(time_interval / rules_mean[i]) > 1:
                        for k in range(1, round(time_interval / rules_mean[i])):
                            row = data_list[j].copy()
                            row[0] = row[0] - k * rules_mean[i]
                            data_list.append(row)
                    timestamp = data_list[j][0]
                    
    new_df = pd.DataFrame(data_list, columns=["Timestamp", "ID", "DLC", "DATA[0]","DATA[1]","DATA[2]","DATA[3]","DATA[4]","DATA[5]","DATA[6]","DATA[7]","Flag"])
    new_df = new_df.sort_values(by="Timestamp")
    new_df.to_csv(dataset, index=False)
    print(dataset + " ok!")

process_dataset(df_DoS, "new_DoS_dataset.csv")
process_dataset(df_Fuzzy, "new_Fuzzy_dataset.csv")
process_dataset(df_RPM, "Spoofing_dataset.csv")