### 读取所有数据

In [37]:
import pandas as pd

# 文件路径
file_path = 'Deal_max_engine_coolant_temp_attack_masquerade.csv'

# 读取CSV文件
df = pd.read_csv(file_path, header=None)  # 假设CSV文件没有列名

# 假设第10列是标签列，索引为9（因为Python索引从0开始）
label_column = df.iloc[:, 9]  # 选择第10列

# 计算每个唯一值的出现次数
label_counts = label_column.value_counts()

# 打印结果
print(label_counts)


0.0    57098
1.0       43
Name: 9, dtype: int64


In [20]:
import pandas as pd
import numpy as np
import os

# 定义文件夹路径
folder_path = '.'

# 文件名列表
file_names = ['Deal_reverse_light_off_attack_1_masquerade.csv', 
              'Deal_max_speedometer_attack_3_masquerade.csv', 
              'Deal_reverse_light_off_attack_2_masquerade.csv', 
              'Deal_correlated_signal_attack_2_masquerade.csv', 
              'Deal_max_engine_coolant_temp_attack_masquerade.csv', 
              'Deal_max_speedometer_attack_1_masquerade.csv', 
              'Deal_max_speedometer_attack_2_masquerade.csv', 
              'Deal_reverse_light_off_attack_3_masquerade.csv', 
              'Deal_correlated_signal_attack_3_masquerade.csv', 
              'Deal_reverse_light_on_attack_2_masquerade.csv', 
              'Deal_reverse_light_on_attack_1_masquerade.csv', 
              'Deal_correlated_signal_attack_1_masquerade.csv', 
              'Deal_reverse_light_on_attack_3_masquerade.csv']

# 准备一个空的DataFrame来存储合并后的数据
combined_df = pd.DataFrame()

# 读取每个文件，更新列名和攻击类型
for file_name in file_names:
    file_path = os.path.join(folder_path, file_name)
    df = pd.read_csv(file_path, header=None)  # 假设原始文件没有列名
    # 更新列名
    column_names = [f'ID{i}' for i in range(9)] + ['label']
    df.columns = column_names
    # 提取攻击类型信息
    attack_type = '_'.join(file_name.split('_')[1:-2])
    # 根据label列的值分配攻击类型或"normal"
    df['attack_type'] = np.where(df['label'] == 1, attack_type, 'normal')
    # 将读取的数据添加到总的DataFrame中
    combined_df = pd.concat([combined_df, df], ignore_index=True)

# 计算attack_type的数量
attack_type_counts = combined_df['attack_type'].value_counts()

# 打印attack_type的数量
print(attack_type_counts)


normal                      1423358
max_speedometer_attack        11624
reverse_light_on_attack        7930
correlated_signal_attack       5453
reverse_light_off_attack       5440
max_engine_coolant_temp          43
Name: attack_type, dtype: int64


In [21]:
combined_df = combined_df.drop('label', axis=1)
print(combined_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1453848 entries, 0 to 1453847
Data columns (total 10 columns):
 #   Column       Non-Null Count    Dtype 
---  ------       --------------    ----- 
 0   ID0          1453848 non-null  int64 
 1   ID1          1453848 non-null  int64 
 2   ID2          1453848 non-null  int64 
 3   ID3          1453848 non-null  int64 
 4   ID4          1453848 non-null  int64 
 5   ID5          1453848 non-null  int64 
 6   ID6          1453848 non-null  int64 
 7   ID7          1453848 non-null  int64 
 8   ID8          1453848 non-null  int64 
 9   attack_type  1453848 non-null  object
dtypes: int64(9), object(1)
memory usage: 110.9+ MB
None


合并'label', 'category', 'specific_class'列

查看合并以后的类别情况。

In [22]:
# 查看 'Label' 列的所有唯一值及其出现次数
combined_df = combined_df.rename(columns={'attack_type': 'Label'})
label_counts = combined_df['Label'].value_counts()
# 打印结果
print(label_counts)

normal                      1423358
max_speedometer_attack        11624
reverse_light_on_attack        7930
correlated_signal_attack       5453
reverse_light_off_attack       5440
max_engine_coolant_temp          43
Name: Label, dtype: int64


In [24]:
combined_df.describe()

Unnamed: 0,ID0,ID1,ID2,ID3,ID4,ID5,ID6,ID7,ID8
count,1453848.0,1453848.0,1453848.0,1453848.0,1453848.0,1453848.0,1453848.0,1453848.0,1453848.0
mean,745.637,47.77223,61.2955,61.71081,59.34673,66.13537,55.86354,71.01236,79.75526
std,546.3976,64.5177,77.59191,81.94715,78.65313,83.4482,75.10708,83.41113,81.36489
min,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,263.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,651.0,16.0,22.0,12.0,7.0,16.0,9.0,25.0,57.0
75%,1176.0,82.0,89.0,105.0,119.0,124.0,120.0,129.0,136.0
max,1788.0,255.0,255.0,255.0,255.0,255.0,255.0,255.0,255.0


数据归一化

In [26]:
from sklearn.preprocessing import MinMaxScaler

# 创建归一化器实例
scaler = MinMaxScaler()

# 归一化'DATA_0'到'DATA_7'这些列
data_columns = ['ID0','ID1', 'ID2', 'ID3', 'ID4', 'ID5', 'ID6', 'ID7', 'ID8']
combined_df[data_columns] = scaler.fit_transform(combined_df[data_columns])

In [27]:
combined_df.describe()

Unnamed: 0,ID0,ID1,ID2,ID3,ID4,ID5,ID6,ID7,ID8
count,1453848.0,1453848.0,1453848.0,1453848.0,1453848.0,1453848.0,1453848.0,1453848.0,1453848.0
mean,0.41506,0.1873421,0.2403745,0.2420032,0.2327323,0.2593544,0.2190727,0.2784798,0.3127657
std,0.3066204,0.2530106,0.304282,0.3213614,0.3084436,0.3272479,0.2945376,0.3271025,0.319078
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.14422,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.3619529,0.0627451,0.08627451,0.04705882,0.02745098,0.0627451,0.03529412,0.09803922,0.2235294
75%,0.6565657,0.3215686,0.3490196,0.4117647,0.4666667,0.4862745,0.4705882,0.5058824,0.5333333
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [28]:
combined_df.head()

Unnamed: 0,ID0,ID1,ID2,ID3,ID4,ID5,ID6,ID7,ID8,Label
0,0.94725,0.015686,0.25098,0.015686,0.494118,0.121569,0.752941,0.082353,0.258824,normal
1,0.161055,0.564706,0.0,0.176471,0.87451,0.12549,0.219608,0.364706,0.376471,normal
2,0.984287,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,normal
3,0.14422,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,normal
4,0.10101,0.023529,0.658824,0.066667,0.768627,0.062745,0.0,0.031373,0.384314,normal


对数据进行放缩，将其转为[0,255]的形式

In [29]:
from sklearn.preprocessing import QuantileTransformer
# Transform all features into the scale of [0,1]
numeric_features = combined_df.dtypes[combined_df.dtypes != 'object'].index
scaler = QuantileTransformer() 
combined_df[numeric_features] = scaler.fit_transform(combined_df[numeric_features])

In [30]:
# Multiply the feature values by 255 to transform them into the scale of [0,255]
combined_df[numeric_features] = combined_df[numeric_features].apply(
    lambda x: (x*255))

In [31]:
combined_df.describe()

Unnamed: 0,ID0,ID1,ID2,ID3,ID4,ID5,ID6,ID7,ID8
count,1453848.0,1453848.0,1453848.0,1453848.0,1453848.0,1453848.0,1453848.0,1453848.0,1453848.0
mean,127.4876,110.8493,114.4654,118.475,112.0808,119.2311,111.9934,115.8819,116.2013
std,73.26597,91.47107,88.92862,85.41853,90.48398,84.64022,90.65422,87.63717,87.30299
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,63.17568,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,128.521,128.0105,127.8829,129.542,129.4144,127.1171,126.2237,127.6276,127.4362
75%,187.7402,191.4414,191.6967,191.6967,191.952,191.6967,191.5691,191.3138,191.0586
max,255.0,255.0,255.0,255.0,255.0,255.0,255.0,255.0,255.0


In [32]:
# 查看 'Label' 列的所有唯一值及其出现次数
label_counts = combined_df['Label'].value_counts()

# 打印结果
print(label_counts)

normal                      1423358
max_speedometer_attack        11624
reverse_light_on_attack        7930
correlated_signal_attack       5453
reverse_light_off_attack       5440
max_engine_coolant_temp          43
Name: Label, dtype: int64


对数据进行分类

In [39]:
df0=combined_df[combined_df['Label']=='normal'].drop(['Label'],axis=1)
df1=combined_df[combined_df['Label']=='max_speedometer_attack'].drop(['Label'],axis=1)
df2=combined_df[combined_df['Label']=='reverse_light_on_attack'].drop(['Label'],axis=1)
df3=combined_df[combined_df['Label']=='correlated_signal_attack'].drop(['Label'],axis=1)
df4=combined_df[combined_df['Label']=='reverse_light_off_attack'].drop(['Label'],axis=1)
df5=combined_df[combined_df['Label']=='max_engine_coolant_temp'].drop(['Label'],axis=1)


In [42]:
df5.describe()

Unnamed: 0,ID0,ID1,ID2,ID3,ID4,ID5,ID6,ID7,ID8
count,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0
mean,192.973,0.0,0.0,0.0,143.7087,81.55405,255.0,0.0,0.0
std,2.875807e-14,0.0,0.0,0.0,2.875807e-14,1.437904e-14,0.0,0.0,0.0
min,192.973,0.0,0.0,0.0,143.7087,81.55405,255.0,0.0,0.0
25%,192.973,0.0,0.0,0.0,143.7087,81.55405,255.0,0.0,0.0
50%,192.973,0.0,0.0,0.0,143.7087,81.55405,255.0,0.0,0.0
75%,192.973,0.0,0.0,0.0,143.7087,81.55405,255.0,0.0,0.0
max,192.973,0.0,0.0,0.0,143.7087,81.55405,255.0,0.0,0.0


## 将数据集转为9 * 9 * 3的图像形式
1. 选取前27行数据
2. 对于不足27行的情况，提取前27行
3. 对选取的数据reshape为 9，9，3的形式

In [35]:
# import numpy as np
# from PIL import Image
# import os

# # 假设 combined_df 是你的原始 DataFrame
# labels = [
#     'normal', 
#     'max_speedometer_attack', 
#     'reverse_light_on_attack', 
#     'correlated_signal_attack', 
#     'reverse_light_off_attack', 
#     'max_engine_coolant_temp'
# ]
# image_counts = []

# needed_pixels = 9 * 9 * 3  # 每张图片需要的像素数
# rows_needed = 27  # 每张图片需要的行数

# for idx, label in enumerate(labels):
#     df = combined_df[combined_df['Label'] == label].drop(['Label'], axis=1)
#     image_path = f"./train/{idx}/"
#     os.makedirs(image_path, exist_ok=True)
    
#     image_count = 0  # 初始化当前类别的图像计数器
#     num_images = len(df) // rows_needed  # 计算可能生成的图片数量
    
#     # 处理所有完整的图片集
#     for i in range(num_images):
#         data_block = df.iloc[i*rows_needed:(i+1)*rows_needed].values.flatten()
#         if len(data_block) < needed_pixels:
#             data_block = np.tile(data_block, needed_pixels // len(data_block) + 1)[:needed_pixels]
        
#         image_data = data_block.reshape((9, 9, 3))
#         image_data = np.array(image_data, dtype=np.uint8)
#         new_image = Image.fromarray(image_data)
#         new_image.save(os.path.join(image_path, f'{image_count}.png'))
        
#         image_count += 1
    
#     # 处理最后不足384行的数据集
#     remainder = len(df) % rows_needed
#     if remainder != 0:
#         # 如果剩余行数不为0，使用类别的前384行作为替代
#         data_block = df.iloc[:rows_needed].values.flatten()
#         image_data = data_block.reshape((9, 9, 3))
#         image_data = np.array(image_data, dtype=np.uint8)
#         new_image = Image.fromarray(image_data)
#         new_image.save(os.path.join(image_path, f'{image_count}.png'))
#         image_count += 1
    
#     image_counts.append((label, image_count))

# # 输出每个类别的图像数量
# for label, count in image_counts:
#     print(f"Category: {label} - Images: {count}")


Category: normal - Images: 52717
Category: max_speedometer_attack - Images: 431
Category: reverse_light_on_attack - Images: 294
Category: correlated_signal_attack - Images: 202
Category: reverse_light_off_attack - Images: 202
Category: max_engine_coolant_temp - Images: 2


In [44]:
import numpy as np
from PIL import Image
import os

# 假设 combined_df 是你的原始 DataFrame
labels = [
    'normal', 
    'max_speedometer_attack', 
    'reverse_light_on_attack', 
    'correlated_signal_attack', 
    'reverse_light_off_attack', 
    'max_engine_coolant_temp'
]
image_counts = []

needed_pixels = 9 * 9 * 3  # 每张图片需要的像素数
rows_needed = 27  # 每张图片需要的行数

for idx, label in enumerate(labels):
    df = combined_df[combined_df['Label'] == label].drop(['Label'], axis=1)
    image_path = f"./train/{idx}/"
    os.makedirs(image_path, exist_ok=True)
    
    image_count = 0  # 初始化当前类别的图像计数器
    if label == 'max_engine_coolant_temp':
        # 使用滑动窗口方法
        num_images = len(df) - rows_needed + 1
        for i in range(num_images):
            data_block = df.iloc[i:i+rows_needed].values.flatten()
            if len(data_block) < needed_pixels:
                data_block = np.tile(data_block, needed_pixels // len(data_block) + 1)[:needed_pixels]

            image_data = data_block.reshape((9, 9, 3))
            image_data = np.array(image_data, dtype=np.uint8)
            new_image = Image.fromarray(image_data)
            new_image.save(os.path.join(image_path, f'{image_count}.png'))
            image_count += 1
    else:
        # 原有方法
        num_images = len(df) // rows_needed
        for i in range(num_images):
            data_block = df.iloc[i*rows_needed:(i+1)*rows_needed].values.flatten()
            if len(data_block) < needed_pixels:
                data_block = np.tile(data_block, needed_pixels // len(data_block) + 1)[:needed_pixels]
            
            image_data = data_block.reshape((9, 9, 3))
            image_data = np.array(image_data, dtype=np.uint8)
            new_image = Image.fromarray(image_data)
            new_image.save(os.path.join(image_path, f'{image_count}.png'))
            
            image_count += 1
    
        # 处理最后不足 rows_needed 行的数据集
        remainder = len(df) % rows_needed
        if remainder != 0:
            data_block = df.iloc[:rows_needed].values.flatten()
            image_data = data_block.reshape((9, 9, 3))
            image_data = np.array(image_data, dtype=np.uint8)
            new_image = Image.fromarray(image_data)
            new_image.save(os.path.join(image_path, f'{image_count}.png'))
            image_count += 1

    image_counts.append((label, image_count))

# 输出每个类别的图像数量
for label, count in image_counts:
    print(f"Category: {label} - Images: {count}")


Category: normal - Images: 52717
Category: max_speedometer_attack - Images: 431
Category: reverse_light_on_attack - Images: 294
Category: correlated_signal_attack - Images: 202
Category: reverse_light_off_attack - Images: 202
Category: max_engine_coolant_temp - Images: 17


## 数据划分，转换后的数据划分为测试数据和训练数据
1. 将处理好的数据分割，分割一部分作为测试数据
2. 其中测试数据占总的30%

In [45]:
import os
import shutil
from random import sample

# 图片存储的基础目录
base_dir = './train'
test_dir = './test'
os.makedirs(test_dir, exist_ok=True)  # 确保测试目录存在

# 类别标签
labels = ['0', '1', '2', '3', '4', '5']

# 为每个类别处理图片
for label in labels:
    class_dir = os.path.join(base_dir, label)  # 类别的目录
    images = os.listdir(class_dir)  # 列出所有图片
    test_sample = sample(images, k=int(len(images) * 0.3))  # 随机选择30%的图片
    
    # 为测试集创建相应的目录
    test_class_dir = os.path.join(test_dir, label)
    os.makedirs(test_class_dir, exist_ok=True)
    
    # 将选中的图片移动到测试目录
    for image in test_sample:
        src_path = os.path.join(class_dir, image)
        dst_path = os.path.join(test_class_dir, image)
        shutil.move(src_path, dst_path)  # 使用move进行移动，也可以用shutil.copy(src_path, dst_path)进行复制

# 输出完成信息
print("Test datasets have been created successfully.")


Test datasets have been created successfully.
