In [29]:
import numpy as np
import pandas as pd

df = pd.read_csv('./1029_labeled.csv')
df

Unnamed: 0,Time,Source,Destination,Protocol,Length,ID,Data,Same Data,Strange Data,Entropy,Dos Attack,Fuzzing Attack,Replaying Attack,label
0,0.000000,0.0,0.0,CAN,32,168,8000080012504000,0,0,3.339354,Normal,Normal,Normal,0
1,0.001220,0.0,0.0,CAN,32,588,00c6000040001000,0,0,3.339354,Normal,Normal,Normal,0
2,0.002423,0.0,0.0,CAN,32,589,000000ff9c000000,0,0,3.339354,Normal,Normal,Normal,0
3,0.003685,0.0,0.0,CAN,32,178,0000000000000000,0,0,3.339354,Normal,Normal,Normal,0
4,0.011932,0.0,0.0,CAN,32,101,9000000000000100,0,0,3.339354,Normal,Normal,Normal,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
163956,641.001177,0.0,0.0,CAN,32,147,0000000000000000,0,0,3.741446,Normal,Normal,Normal,0
163957,641.010075,0.0,0.0,CAN,32,112,c000001f04800000,0,0,3.741446,Normal,Normal,Normal,0
163958,641.012440,0.0,0.0,CAN,32,188,98a0000000000000,0,0,3.741446,Normal,Normal,Normal,0
163959,641.014382,0.0,0.0,CAN,32,141,3000000000000000,0,0,3.741446,Normal,Normal,Normal,0


In [30]:
# 1번 features - IAT(Inter-arrival Time) : CAN ID 그룹 내 시간 차이

# 1. Timestamp와 ID를 기준으로 정렬 (IAT 계산을 위한 준비)
df = df.sort_values(by=['ID', 'Time']).reset_index(drop=True)

# 2. ID별로 그룹화하고 Inter-arrival Time 계산
df['IAT'] = df.groupby('ID')['Time'].diff()

# 3. NaN 값 처리 (첫 번째 메시지의 IAT는 이전 값이 없으므로 NaN이 됨)
df['IAT'] = df['IAT'].fillna(0)  # 0으로 채우기

# 결과 확인
print(df.head())
df.to_csv('1029_labeled_1.csv', index = False, encoding = 'utf-8')

       Time  Source  Destination Protocol  Length ID              Data  \
0  0.129171     0.0          0.0      CAN      32  0  0000000000000000   
1  1.177590     0.0          0.0      CAN      32  0  0000000000000000   
2  2.226080     0.0          0.0      CAN      32  0  0000000000000000   
3  3.274480     0.0          0.0      CAN      32  0  0000000000000000   
4  4.322968     0.0          0.0      CAN      32  0  0000000000000000   

   Same Data  Strange Data   Entropy Dos Attack Fuzzing Attack  \
0          0             0  3.921928     Normal         Normal   
1          0             0  3.446439     Normal         Normal   
2          0             0  3.121928     Normal         Normal   
3          0             0  3.921928     Normal         Normal   
4          0             0  3.239354     Normal         Normal   

  Replaying Attack  label       IAT  
0           Normal      0  0.000000  
1           Normal      0  1.048419  
2           Normal      0  1.048490  
3     

In [32]:
# IAT 이상치 판단 기준: 0.09 이하 또는 2 이상
df['IAT_Anomaly'] = ((df['IAT'] <= 0.09) | (df['IAT'] >= 2)).astype(int)

# 결과 저장
df.to_csv('1029_labeled_2.csv', index=False, encoding='utf-8')

# 결과 확인
print(df[['ID', 'Time', 'IAT', 'IAT_Anomaly']].head())


  ID      Time       IAT  IAT_Anomaly
0  0  0.129171  0.000000            1
1  0  1.177590  1.048419            0
2  0  2.226080  1.048490            0
3  0  3.274480  1.048400            0
4  0  4.322968  1.048488            0


In [33]:
# 2번 features - ID별 메시지 빈도 계산

# ID별 메시지 빈도 계산
message_frequency = df['ID'].value_counts()

# 데이터프레임에 빈도 열 추가
df['Message_Frequency'] = df['ID'].map(message_frequency)

# 결과 저장
df.to_csv('1029_labeled_3.csv', index=False, encoding='utf-8')

# 결과 확인
print(df[['ID', 'Message_Frequency']].head())


  ID  Message_Frequency
0  0                612
1  0                612
2  0                612
3  0                612
4  0                612


In [40]:
# 메시지 빈도 이상치 : 메시지 빈도의 평균보다 2000회 이상 2000회 이하로 나왔을 때

# 조건에 따라 1 또는 0 반환하는 열 추가
df['Frequency_Anomaly'] = ((df['Message_Frequency'] < 500) | (df['Message_Frequency'] > 10000)).astype(int)

# 결과 저장
df.to_csv('1029_labeled_4.csv', index=False, encoding='utf-8')

# 결과 확인
print(df[['ID', 'Message_Frequency', 'Frequency_Anomaly']].head())

  ID  Message_Frequency  Frequency_Anomaly
0  0                612                  0
1  0                612                  0
2  0                612                  0
3  0                612                  0
4  0                612                  0


In [7]:
# 3번 features

In [9]:
# 4번 features

In [11]:
# 5번 features

In [13]:
# 6번 features