In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from cmath import nan
import pywt
import torch
import pywt
from pandarallel import pandarallel
import tqdm
from functools import partial
import glob
import json
from pathlib import Path
from numpy.lib.stride_tricks import as_strided
# pandarallel.initialize(progress_bar=True)

## Label attack data

In [3]:
data_folder = '../../../Data/road/attacks/'
meta_file = data_folder + '/capture_metadata.json'
meta_data = json.load(open(meta_file))
meta_df = pd.DataFrame.from_dict(meta_data, orient='index')
meta_df

Unnamed: 0,description,elapsed_sec,injection_data_str,injection_id,injection_interval,modified,on_dyno
accelerator_attack_drive_1,start from park; drive; car accelerates until ...,86.46291,,,,False,True
accelerator_attack_drive_2,start from park; drive; car accelerates until ...,72.868275,,,,False,True
accelerator_attack_reverse_1,start from park; reverse; car accelerates unti...,86.13449,,,,False,True
accelerator_attack_reverse_2,start from park; reverse; car accelerates unti...,105.438372,,,,False,True
correlated_signal_attack_1,start from driving; accelerate; start injectin...,33.101852,595945450000FFFF,0x6e0,"[9.191851, 30.050109]",False,True
correlated_signal_attack_1_masquerade,start from driving; accelerate; start injectin...,33.101852,595945450000FFFF,0x6e0,"[9.191851, 30.050109]",True,True
correlated_signal_attack_2,start from driving; accelerate; start injectin...,28.226893,595945450000FFFF,0x6e0,"[6.830477, 28.225908]",False,True
correlated_signal_attack_2_masquerade,start from driving; accelerate; start injectin...,28.226893,595945450000FFFF,0x6e0,"[6.830477, 28.225908]",True,True
correlated_signal_attack_3,start from driving; accelerate; start injectin...,16.963905,595945450000FFFF,0x6e0,"[4.318482, 16.95706]",False,True
correlated_signal_attack_3_masquerade,start from driving; accelerate; start injectin...,16.963905,595945450000FFFF,0x6e0,"[4.318482, 16.95706]",True,True


In [4]:
# attack_name = 'max_speedometer_attack_1'
def read_data(attack_name: str):
    input_file = Path(data_folder) / f'{attack_name}.log'
    df = pd.read_csv(input_file, sep=" ", header=None, names=['timestamp', 'interface', 'frame'])
    df['timestamp'] = df['timestamp'].apply(lambda x: float(x[1:-1]))
    df['timestamp'] = df['timestamp'] - df.iloc[0]['timestamp']
    df['id'] = df['frame'].apply(lambda x: x.split('#')[0])
    df['data'] = df['frame'].apply(lambda x: x.split('#')[1])
    df['label'] = False
    df = df.drop(['frame', 'interface'], axis=1)
    return df

In [5]:
def read_info_attack(attack_name:str):
    start_injected, end_injected = meta_df.loc[attack_name].injection_interval
    print(f'The attack {attack_name} starts from {start_injected} to {end_injected}')
    injection_id = meta_df.loc[attack_name].injection_id
    injection_data = meta_df.loc[attack_name].injection_data_str
    injection_id = (injection_id[2:].upper()).zfill(3)
    return start_injected, end_injected, injection_id, injection_data

In [6]:
def find_change_byte(data_str):
    for idx, ch in enumerate(data_str):
        if ch != 'X': return idx
    return -1

In [7]:
def label_data(df, start_injected, end_injected, injection_id, injection_data):
    idx = find_change_byte(injection_data)
    if idx == -1:
        regex_pattern = injection_data
    else:
        regex_pattern = "(.{" + str(idx) + "}" + injection_data[idx:idx+2] + ".{" + str(len(injection_data) - idx - 2) + "})"
    
    time_mask = ((df.timestamp >= start_injected) & (df.timestamp <= end_injected))
    attack_mask = ((df.data.str.match(regex_pattern)) & (df.id == injection_id))
    attack_indices = df[time_mask & attack_mask].index
    df.loc[attack_indices, 'label'] = True
    return df

In [8]:
def convert_canid_bits(cid):
    try:
        s = bin(int(str(cid), 16))[2:].zfill(29)
    except:
        s = bin(int(str(int(float(cid))), 16))[2:].zfill(29)
    bits = np.array(list(map(int, list(s))))
    return bits

In [9]:
def sliding_window(data, win=29, s=1):
    itemsize  = data.itemsize
    N = len(data)
    sliding_data = as_strided(data, shape=((N - win) // s + 1, win), strides=(itemsize*s, itemsize))
    return sliding_data

In [10]:
def preprocess_data(attack_name, output_path):
    df = read_data(attack_name)
    start_injected, end_injected, injection_id, injection_data = read_info_attack(attack_name)
    df = label_data(df, start_injected, end_injected, injection_id, injection_data)
    df['can_id'] = df.id.apply(convert_canid_bits)
    df['label'] = df['label'].astype(int)
    window_size = 29 
    strided = 10
    can_id_sequences = sliding_window(df.can_id.to_numpy(), win=window_size, s=strided)
    can_id_sequences = np.array([np.stack(x) for x in can_id_sequences]).astype('int8')
    label_sequences = sliding_window(df.label.to_numpy(), win=window_size, s=strided)
    labels = np.any(label_sequences, axis=1).astype('int8')
    normal_indices = np.where(labels == 0)
    attack_indices = np.where(~(labels == 0)) 
    output_path = Path(output_path)
    output_path.mkdir(parents=True, exist_ok=True)
    file_name = f'{attack_name}.npz'
    # Save normal
    print('Save to: ', output_path / ('Normal_' + file_name))
    np.savez_compressed(output_path / ('Normal_' + file_name), 
                        X=can_id_sequences[normal_indices], y=labels[normal_indices])
    # Save attack
    print('Save to: ', output_path / (file_name))
    np.savez_compressed(output_path / file_name, X=can_id_sequences[attack_indices], y=labels[attack_indices])
    return np.unique(labels, return_counts=True)[1]

In [11]:
attacks_list = meta_df.index.to_list()

In [12]:
masquerade_attacks = [x for x in attacks_list if 'masquerade' in x]
masquerade_attacks

['correlated_signal_attack_1_masquerade',
 'correlated_signal_attack_2_masquerade',
 'correlated_signal_attack_3_masquerade',
 'max_engine_coolant_temp_attack_masquerade',
 'max_speedometer_attack_1_masquerade',
 'max_speedometer_attack_2_masquerade',
 'max_speedometer_attack_3_masquerade',
 'reverse_light_off_attack_1_masquerade',
 'reverse_light_off_attack_2_masquerade',
 'reverse_light_off_attack_3_masquerade',
 'reverse_light_on_attack_1_masquerade',
 'reverse_light_on_attack_2_masquerade',
 'reverse_light_on_attack_3_masquerade']

In [16]:
Path(output_path).mkdir(parents=True, exist_ok=True)

In [13]:
output_path = '../../../Data/road/Preprocessed_Data/'
label_cnt = np.zeros(2, dtype=int)
for a in masquerade_attacks:
    label_cnt += preprocess_data(attack_name=a, output_path=output_path)

The attack correlated_signal_attack_1_masquerade starts from 9.191851 to 30.050109
Save to:  ../../../Data/road/Preprocessed_Data/Normal_correlated_signal_attack_1_masquerade.npz
Save to:  ../../../Data/road/Preprocessed_Data/correlated_signal_attack_1_masquerade.npz
The attack correlated_signal_attack_2_masquerade starts from 6.830477 to 28.225908
Save to:  ../../../Data/road/Preprocessed_Data/Normal_correlated_signal_attack_2_masquerade.npz
Save to:  ../../../Data/road/Preprocessed_Data/correlated_signal_attack_2_masquerade.npz
The attack correlated_signal_attack_3_masquerade starts from 4.318482 to 16.95706
Save to:  ../../../Data/road/Preprocessed_Data/Normal_correlated_signal_attack_3_masquerade.npz
Save to:  ../../../Data/road/Preprocessed_Data/correlated_signal_attack_3_masquerade.npz
The attack max_engine_coolant_temp_attack_masquerade starts from 19.979078 to 24.170183
Save to:  ../../../Data/road/Preprocessed_Data/Normal_max_engine_coolant_temp_attack_masquerade.npz
Save to: 

In [14]:
label_cnt

array([87928, 69081])

## Train test split

In [17]:
import sys
sys.path.append('../../')
from src.preprocessing import train_test_split as tts


In [18]:
indir = '../../../Data/road/Preprocessed_Data/'
outdir = '../../../Data/road/Preprocessed_Data/'
sid = 1 
f = 0.3

In [19]:
tts.main(indir, outdir, masquerade_attacks, split_id=1, test_fraction=0.3)

Processing:  ../../../Data/road/Preprocessed_Data/correlated_signal_attack_1_masquerade.npz
Start writing to:  ../../../Data/road/Preprocessed_Data/1/train


3092it [00:00, 3613.35it/s]


Start writing to:  ../../../Data/road/Preprocessed_Data/1/val


1324it [00:00, 3673.71it/s]


Train size: 3092, Test size: 1324
Processing:  ../../../Data/road/Preprocessed_Data/correlated_signal_attack_2_masquerade.npz
Start writing to:  ../../../Data/road/Preprocessed_Data/1/train


3175it [00:00, 4063.07it/s]


Start writing to:  ../../../Data/road/Preprocessed_Data/1/val


1360it [00:00, 3979.91it/s]


Train size: 3175, Test size: 1360
Processing:  ../../../Data/road/Preprocessed_Data/correlated_signal_attack_3_masquerade.npz
Start writing to:  ../../../Data/road/Preprocessed_Data/1/train


1987it [00:00, 3942.28it/s]


Start writing to:  ../../../Data/road/Preprocessed_Data/1/val


851it [00:00, 4182.25it/s]


Train size: 1987, Test size: 851
Processing:  ../../../Data/road/Preprocessed_Data/max_engine_coolant_temp_attack_masquerade.npz
Start writing to:  ../../../Data/road/Preprocessed_Data/1/train


86it [00:00, 4685.40it/s]


Start writing to:  ../../../Data/road/Preprocessed_Data/1/val


36it [00:00, 2050.14it/s]


Train size: 86, Test size: 36
Processing:  ../../../Data/road/Preprocessed_Data/max_speedometer_attack_1_masquerade.npz
Start writing to:  ../../../Data/road/Preprocessed_Data/1/train


3746it [00:01, 3671.85it/s]


Start writing to:  ../../../Data/road/Preprocessed_Data/1/val


1605it [00:00, 4290.85it/s]


Train size: 3746, Test size: 1605
Processing:  ../../../Data/road/Preprocessed_Data/max_speedometer_attack_2_masquerade.npz
Start writing to:  ../../../Data/road/Preprocessed_Data/1/train


4791it [00:00, 5108.99it/s]


Start writing to:  ../../../Data/road/Preprocessed_Data/1/val


2052it [00:00, 5940.87it/s]


Train size: 4791, Test size: 2052
Processing:  ../../../Data/road/Preprocessed_Data/max_speedometer_attack_3_masquerade.npz
Start writing to:  ../../../Data/road/Preprocessed_Data/1/train


9881it [00:02, 4465.05it/s]


Start writing to:  ../../../Data/road/Preprocessed_Data/1/val


4234it [00:01, 3676.09it/s]


Train size: 9881, Test size: 4234
Processing:  ../../../Data/road/Preprocessed_Data/reverse_light_off_attack_1_masquerade.npz
Start writing to:  ../../../Data/road/Preprocessed_Data/1/train


1084it [00:00, 3971.79it/s]


Start writing to:  ../../../Data/road/Preprocessed_Data/1/val


464it [00:00, 5422.97it/s]


Train size: 1084, Test size: 464
Processing:  ../../../Data/road/Preprocessed_Data/reverse_light_off_attack_2_masquerade.npz
Start writing to:  ../../../Data/road/Preprocessed_Data/1/train


3728it [00:00, 4307.38it/s]


Start writing to:  ../../../Data/road/Preprocessed_Data/1/val


1597it [00:00, 3679.79it/s]


Train size: 3728, Test size: 1597
Processing:  ../../../Data/road/Preprocessed_Data/reverse_light_off_attack_3_masquerade.npz
Start writing to:  ../../../Data/road/Preprocessed_Data/1/train


3758it [00:01, 3694.15it/s]


Start writing to:  ../../../Data/road/Preprocessed_Data/1/val


1610it [00:00, 3180.32it/s]


Train size: 3758, Test size: 1610
Processing:  ../../../Data/road/Preprocessed_Data/reverse_light_on_attack_1_masquerade.npz
Start writing to:  ../../../Data/road/Preprocessed_Data/1/train


3185it [00:00, 4521.75it/s]


Start writing to:  ../../../Data/road/Preprocessed_Data/1/val


1365it [00:00, 3882.44it/s]


Train size: 3185, Test size: 1365
Processing:  ../../../Data/road/Preprocessed_Data/reverse_light_on_attack_2_masquerade.npz
Start writing to:  ../../../Data/road/Preprocessed_Data/1/train


6020it [00:01, 4293.32it/s]


Start writing to:  ../../../Data/road/Preprocessed_Data/1/val


2580it [00:00, 4390.90it/s]


Train size: 6020, Test size: 2580
Processing:  ../../../Data/road/Preprocessed_Data/reverse_light_on_attack_3_masquerade.npz
Start writing to:  ../../../Data/road/Preprocessed_Data/1/train


3829it [00:00, 4877.08it/s]


Start writing to:  ../../../Data/road/Preprocessed_Data/1/val


1641it [00:00, 4876.97it/s]


Train size: 3829, Test size: 1641
Processing:  ../../../Data/road/Preprocessed_Data/Normal_correlated_signal_attack_1_masquerade.npz
Start writing to:  ../../../Data/road/Preprocessed_Data/1/train


2450it [00:00, 4161.50it/s]


Start writing to:  ../../../Data/road/Preprocessed_Data/1/val


1049it [00:00, 4282.03it/s]


Train size: 2450, Test size: 1049
Processing:  ../../../Data/road/Preprocessed_Data/Normal_correlated_signal_attack_2_masquerade.npz
Start writing to:  ../../../Data/road/Preprocessed_Data/1/train


1550it [00:00, 3970.24it/s]


Start writing to:  ../../../Data/road/Preprocessed_Data/1/val


664it [00:00, 4315.19it/s]


Train size: 1550, Test size: 664
Processing:  ../../../Data/road/Preprocessed_Data/Normal_correlated_signal_attack_3_masquerade.npz
Start writing to:  ../../../Data/road/Preprocessed_Data/1/train


852it [00:00, 4048.43it/s]


Start writing to:  ../../../Data/road/Preprocessed_Data/1/val


364it [00:00, 3474.02it/s]


Train size: 852, Test size: 364
Processing:  ../../../Data/road/Preprocessed_Data/Normal_max_engine_coolant_temp_attack_masquerade.npz
Start writing to:  ../../../Data/road/Preprocessed_Data/1/train


4245it [00:01, 4187.43it/s]


Start writing to:  ../../../Data/road/Preprocessed_Data/1/val


1819it [00:00, 4261.66it/s]


Train size: 4245, Test size: 1819
Processing:  ../../../Data/road/Preprocessed_Data/Normal_max_speedometer_attack_1_masquerade.npz
Start writing to:  ../../../Data/road/Preprocessed_Data/1/train


11020it [00:02, 4010.45it/s]


Start writing to:  ../../../Data/road/Preprocessed_Data/1/val


4722it [00:01, 4611.34it/s]


Train size: 11020, Test size: 4722
Processing:  ../../../Data/road/Preprocessed_Data/Normal_max_speedometer_attack_2_masquerade.npz
Start writing to:  ../../../Data/road/Preprocessed_Data/1/train


5201it [00:01, 4129.41it/s]


Start writing to:  ../../../Data/road/Preprocessed_Data/1/val


2229it [00:00, 4379.94it/s]


Train size: 5201, Test size: 2229
Processing:  ../../../Data/road/Preprocessed_Data/Normal_max_speedometer_attack_3_masquerade.npz
Start writing to:  ../../../Data/road/Preprocessed_Data/1/train


4639it [00:01, 4482.96it/s]


Start writing to:  ../../../Data/road/Preprocessed_Data/1/val


1988it [00:00, 4917.65it/s]


Train size: 4639, Test size: 1988
Processing:  ../../../Data/road/Preprocessed_Data/Normal_reverse_light_off_attack_1_masquerade.npz
Start writing to:  ../../../Data/road/Preprocessed_Data/1/train


3622it [00:01, 3400.39it/s]


Start writing to:  ../../../Data/road/Preprocessed_Data/1/val


1551it [00:00, 3585.99it/s]


Train size: 3622, Test size: 1551
Processing:  ../../../Data/road/Preprocessed_Data/Normal_reverse_light_off_attack_2_masquerade.npz
Start writing to:  ../../../Data/road/Preprocessed_Data/1/train


3079it [00:00, 3225.53it/s]


Start writing to:  ../../../Data/road/Preprocessed_Data/1/val


1319it [00:00, 4133.26it/s]


Train size: 3079, Test size: 1319
Processing:  ../../../Data/road/Preprocessed_Data/Normal_reverse_light_off_attack_3_masquerade.npz
Start writing to:  ../../../Data/road/Preprocessed_Data/1/train


5931it [00:01, 3458.08it/s]


Start writing to:  ../../../Data/road/Preprocessed_Data/1/val


2541it [00:00, 3822.76it/s]


Train size: 5931, Test size: 2541
Processing:  ../../../Data/road/Preprocessed_Data/Normal_reverse_light_on_attack_1_masquerade.npz
Start writing to:  ../../../Data/road/Preprocessed_Data/1/train


6001it [00:01, 3528.82it/s]


Start writing to:  ../../../Data/road/Preprocessed_Data/1/val


2571it [00:00, 3984.92it/s]


Train size: 6001, Test size: 2571
Processing:  ../../../Data/road/Preprocessed_Data/Normal_reverse_light_on_attack_2_masquerade.npz
Start writing to:  ../../../Data/road/Preprocessed_Data/1/train


6037it [00:01, 3603.91it/s]


Start writing to:  ../../../Data/road/Preprocessed_Data/1/val


2586it [00:00, 4269.49it/s]


Train size: 6037, Test size: 2586
Processing:  ../../../Data/road/Preprocessed_Data/Normal_reverse_light_on_attack_3_masquerade.npz
Start writing to:  ../../../Data/road/Preprocessed_Data/1/train


6929it [00:02, 3131.80it/s]


Start writing to:  ../../../Data/road/Preprocessed_Data/1/val


2969it [00:00, 3870.16it/s]

Train size: 6929, Test size: 2969



