In [1]:
def make_can_df(log_filepath):

    """

    Puts candump data into a dataframe with columns 'time', 'aid', and 'data'

    """

    can_df = pd.read_fwf(

        log_filepath, delimiter = ' '+ '#' + '('+')',

        skiprows = 0,skipfooter=0,

        usecols = [0,2,3],

        dtype = {0:'float64', 1:str, 2: str},

        names = ['time','aid', 'data'] )

 

    can_df.aid = can_df.aid.apply(lambda x: int(x,16))

    can_df.data = can_df.data.apply(lambda x: x.zfill(16)) #pad with 0s on the left for data with dlc < 8

    can_df.time = can_df.time - can_df.time.min()

    return can_df[can_df.aid<=0x700]

In [64]:
import pandas as pd
csa1_df = make_can_df('road/attacks/correlated_signal_attack_1.log')
csa1m_df = make_can_df('road/attacks/correlated_signal_attack_1_masquerade.log')
csa2_df = make_can_df('road/attacks/correlated_signal_attack_2.log')
csa2m_df = make_can_df('road/attacks/correlated_signal_attack_2_masquerade.log')
csa3_df = make_can_df('road/attacks/correlated_signal_attack_3.log')
csa3m_df = make_can_df('road/attacks/correlated_signal_attack_3_masquerade.log')
mecta_df = make_can_df('road/attacks/max_engine_coolant_temp_attack.log') 
fa1_df= make_can_df('road/attacks/fuzzing_attack_1.log')
fa2_df= make_can_df('road/attacks/fuzzing_attack_2.log')
fa3_df= make_can_df('road/attacks/fuzzing_attack_3.log')
mectam_df = make_can_df('road/attacks/max_engine_coolant_temp_attack_masquerade.log')
msa1_df = make_can_df('road/attacks/max_speedometer_attack_1.log')
msa1m_df = make_can_df('road/attacks/max_speedometer_attack_1_masquerade.log')
msa2_df = make_can_df('road/attacks/max_speedometer_attack_2.log')
msa2m_df = make_can_df('road/attacks/max_speedometer_attack_2_masquerade.log')
msa3_df = make_can_df('road/attacks/max_speedometer_attack_3.log')
msa3m_df = make_can_df('road/attacks/max_speedometer_attack_3_masquerade.log')
rloffa1_df = make_can_df('road/attacks/reverse_light_off_attack_1.log')
rloffa1m_df = make_can_df('road/attacks/reverse_light_off_attack_1_masquerade.log')
rloffa2_df = make_can_df('road/attacks/reverse_light_off_attack_2.log')
rloffa2m_df = make_can_df('road/attacks/reverse_light_off_attack_2_masquerade.log')
rloffa3_df = make_can_df('road/attacks/reverse_light_off_attack_3.log')
rloffa3m_df = make_can_df('road/attacks/reverse_light_off_attack_3_masquerade.log')
rlona1_df = make_can_df('road/attacks/reverse_light_on_attack_1_masquerade.log')
rlona1m_df = make_can_df('road/attacks/reverse_light_on_attack_1_masquerade.log')
rlona2_df = make_can_df('road/attacks/reverse_light_on_attack_2_masquerade.log')
rlona2m_df = make_can_df('road/attacks/reverse_light_on_attack_2_masquerade.log')
rlona3_df = make_can_df('road/attacks/reverse_light_on_attack_3_masquerade.log')
rlona3m_df = make_can_df('road/attacks/reverse_light_on_attack_3_masquerade.log')

In [3]:
import re
import pandas as pd
def label_and_format_dataframe(df: pd.DataFrame, injection_data_str: str, output_csv: str = None):
    """
    Formats a parsed CAN DataFrame and labels messages based on a wildcard-aware injection pattern.

    Args:
        df (pd.DataFrame): Input DataFrame with columns ['time', 'aid', 'data']
        injection_data_str (str): Injection string with possible 'X' wildcards (e.g., '59XX45XX0000FFFF')
        output_csv (str, optional): Path to save the labeled output CSV (if given)

    Returns:
        pd.DataFrame: Labeled and formatted DataFrame
    """

    # Copy input DataFrame and standardize
    df = df.copy()
    df['data'] = df['data'].str.upper().str.zfill(16)

    # Split into DATA[0] to DATA[7]
    for i in range(8):
        df[f'DATA[{i}]'] = df['data'].str[i*2:i*2+2].apply(lambda x: int(x, 16))

    # Add DLC = 8
    df['DLC'] = 8

    # Build regex pattern with wildcard support
    inj = injection_data_str.upper().zfill(16)
    pattern = ''.join(['..' if 'X' in inj[i:i+2] else inj[i:i+2] for i in range(0, len(inj), 2)])
    regex = re.compile(f"^{pattern}$")

    # Assign 1 for malicious (matches), 0 for benign
    df['Flag'] = df['data'].apply(lambda x: 1 if regex.match(x) else 0)

    # Rename columns
    df.rename(columns={'time': 'Timestamp', 'aid': 'CAN ID'}, inplace=True)

    # Reorder columns
    ordered_cols = ['Timestamp', 'CAN ID', 'DLC'] + [f'DATA[{i}]' for i in range(8)] + ['Flag']
    df = df[ordered_cols]
    df.drop('Timestamp', axis=1, inplace=True)
    display(df['Flag'].value_counts())
    # Save if requested
    if output_csv:
        df.to_csv(output_csv, index=False)
        print(f" Saved labeled file to: {output_csv}")

    return df


In [None]:
df_injections = {
    "csa1":      (csa1_df, "595945450000FFFF"),
    "csa1m":     (csa1m_df, "595945450000FFFF"),
    "csa2":      (csa2_df, "595945450000FFFF"),
    "csa2m":     (csa2m_df, "595945450000FFFF"),
    "csa3":      (csa3_df, "595945450000FFFF"),
    "csa3m":     (csa3m_df, "595945450000FFFF"),
    "fa1":       (fa1_df, "FFFFFFFFFFFFFFFF"),
    "fa2":       (fa2_df, "FFFFFFFFFFFFFFFF"),
    "fa3":       (fa3_df, "FFFFFFFFFFFFFFFF"),
    "msa1":      (msa1_df, "XXXXXXXXXXFFXXXX"),
    "msa1m":     (msa1m_df, "XXXXXXXXXXFFXXXX"),
    "msa2":      (msa2_df, "XXXXXXXXXXFFXXXX"),
    "msa2m":     (msa2m_df, "XXXXXXXXXXFFXXXX"),
    "msa3":      (msa3_df, "XXXXXXXXXXFFXXXX"),
    "msa3m":     (msa3m_df, "XXXXXXXXXXFFXXXX"),
    "mecta":  (mecta_df, "XXXXXXXXXXFFXXXX"),
    "mectam": (mectam_df, "XXXXXXXXXXFFXXXX"),
    "rloffa1":   (rloffa1_df, "XXXX04XXXXXXXXXX"),
    "rloffa1m":  (rloffa1m_df, "XXXX04XXXXXXXXXX"),
    "rloffa2":   (rloffa1_df, "XXXX04XXXXXXXXXX"),
    "rloffa2m":  (rloffa1m_df, "XXXX04XXXXXXXXXX"),
    "rloffa3":   (rloffa1_df, "XXXX04XXXXXXXXXX"),
    "rloffa3m":  (rloffa1m_df, "XXXX04XXXXXXXXXX"),
    "rlona1":    (rlona2_df, "XXXX0CXXXXXXXXXX"),
    "rlona1m":   (rlona2m_df, "XXXX0CXXXXXXXXXX"),
    "rlona2":    (rlona2_df, "XXXX0CXXXXXXXXXX"),
    "rlona2m":   (rlona2m_df, "XXXX0CXXXXXXXXXX"),
    "rlona3":    (rlona2_df, "XXXX0CXXXXXXXXXX"),
    "rlona3m":   (rlona2m_df, "XXXX0CXXXXXXXXXX"),
}

# Loop through and apply the function
for name, (df, inj_str) in df_injections.items():
    output_path = f"road/preprocessed/{name}.csv"
    label_and_format_dataframe(df, inj_str, output_csv=output_path)

Flag
0    74151
1     2087
Name: count, dtype: int64

 Saved labeled file to: road/preprocessed/csa1.csv


Flag
0    72065
1     2087
Name: count, dtype: int64

 Saved labeled file to: road/preprocessed/csa1m.csv


Flag
0    63259
1     2141
Name: count, dtype: int64

 Saved labeled file to: road/preprocessed/csa2.csv


Flag
0    61119
1     2141
Name: count, dtype: int64

 Saved labeled file to: road/preprocessed/csa2m.csv


Flag
0    38002
1     1265
Name: count, dtype: int64

 Saved labeled file to: road/preprocessed/csa3.csv


Flag
0    36738
1     1265
Name: count, dtype: int64

 Saved labeled file to: road/preprocessed/csa3m.csv


Flag
0    45656
1      592
Name: count, dtype: int64

 Saved labeled file to: road/preprocessed/fa1.csv


Flag
0    29964
1      353
Name: count, dtype: int64

 Saved labeled file to: road/preprocessed/fa2.csv


Flag
0    12287
1      116
Name: count, dtype: int64

 Saved labeled file to: road/preprocessed/fa3.csv


Flag
0    194553
1      5555
Name: count, dtype: int64

 Saved labeled file to: road/preprocessed/msa1.csv


Flag
0    192109
1      5555
Name: count, dtype: int64

 Saved labeled file to: road/preprocessed/msa1m.csv


Flag
0    133104
1      3776
Name: count, dtype: int64

 Saved labeled file to: road/preprocessed/msa2.csv


Flag
0    129964
1      3776
Name: count, dtype: int64

 Saved labeled file to: road/preprocessed/msa2m.csv


Flag
0    192559
1      7890
Name: count, dtype: int64

 Saved labeled file to: road/preprocessed/msa3.csv


Flag
0    186452
1      7890
Name: count, dtype: int64

 Saved labeled file to: road/preprocessed/msa3m.csv


Flag
0    57932
1       88
Name: count, dtype: int64

 Saved labeled file to: road/preprocessed/mecta_df.csv


Flag
0    57890
1       88
Name: count, dtype: int64

 Saved labeled file to: road/preprocessed/mectam_df.csv


Flag
0    60119
1     3535
Name: count, dtype: int64

 Saved labeled file to: road/preprocessed/rloffa1.csv


Flag
0    59447
1     3535
Name: count, dtype: int64

 Saved labeled file to: road/preprocessed/rloffa1m.csv


Flag
0    60119
1     3535
Name: count, dtype: int64

 Saved labeled file to: road/preprocessed/rloffa2.csv


Flag
0    59447
1     3535
Name: count, dtype: int64

 Saved labeled file to: road/preprocessed/rloffa2m.csv


Flag
0    60119
1     3535
Name: count, dtype: int64

 Saved labeled file to: road/preprocessed/rloffa3.csv


Flag
0    59447
1     3535
Name: count, dtype: int64

 Saved labeled file to: road/preprocessed/rloffa3m.csv


Flag
0    156318
1      5065
Name: count, dtype: int64

 Saved labeled file to: road/preprocessed/rlona1.csv


Flag
0    156318
1      5065
Name: count, dtype: int64

 Saved labeled file to: road/preprocessed/rlona1m.csv


Flag
0    156318
1      5065
Name: count, dtype: int64

 Saved labeled file to: road/preprocessed/rlona2.csv


Flag
0    156318
1      5065
Name: count, dtype: int64

 Saved labeled file to: road/preprocessed/rlona2m.csv


Flag
0    156318
1      5065
Name: count, dtype: int64

 Saved labeled file to: road/preprocessed/rlona3.csv


Flag
0    156318
1      5065
Name: count, dtype: int64

 Saved labeled file to: road/preprocessed/rlona3m.csv


In [67]:
import glob
import os
# Path to the folder containing your CSV files
csv_folder = 'road/preprocessed'  # Update this
# Read all CSV files in the folder
csv_files = glob.glob(os.path.join(csv_folder, "*.csv"))
# Read and concatenate all files
attack_df = pd.concat((pd.read_csv(f) for f in csv_files), ignore_index=True)
# Save to a single merged CSV file
attack_df.to_csv("road/preprocessed/merged_attack_data.csv", index=False)


In [2]:
import glob
import os
import pandas as pd

# Path to the folder containing CSV files
csv_folder = 'road/preprocessed'

# Read all CSV files in the folder
csv_files = glob.glob(os.path.join(csv_folder, "*.csv"))

# Filter: exclude files ending with 'm.csv'
csv_files = [f for f in csv_files if not os.path.basename(f).rstrip(".csv").endswith("m")]

# Read and concatenate all files
attack_df = pd.concat((pd.read_csv(f) for f in csv_files), ignore_index=True)

# Save to a single merged CSV file
attack_df.to_csv("road/preprocessed/merged/attack_data_without_masquerade.csv", index=False)

print(f"Merged {len(csv_files)} files into attack_data_without_masquerade.csv")


Merged 16 files into attack_data_without_masquerade.csv


In [4]:
csv_files = [f for f in csv_files if not os.path.basename(f).rstrip(".csv").endswith("m")]
print(csv_files)

['road/preprocessed\\csa1.csv', 'road/preprocessed\\csa2.csv', 'road/preprocessed\\csa3.csv', 'road/preprocessed\\fa1.csv', 'road/preprocessed\\fa2.csv', 'road/preprocessed\\fa3.csv', 'road/preprocessed\\mecta.csv', 'road/preprocessed\\msa1.csv', 'road/preprocessed\\msa2.csv', 'road/preprocessed\\msa3.csv', 'road/preprocessed\\rloffa1.csv', 'road/preprocessed\\rloffa2.csv', 'road/preprocessed\\rloffa3.csv', 'road/preprocessed\\rlona1.csv', 'road/preprocessed\\rlona2.csv', 'road/preprocessed\\rlona3.csv']


In [68]:
display(attack_df['Flag'].value_counts())

Flag
0    5987637
1     199807
Name: count, dtype: int64

In [4]:
addbl_df=make_can_df('road/ambient/ambient_dyno_drive_basic_long.log')
addbs_df=make_can_df('road/ambient/ambient_dyno_drive_basic_short.log')
addba_df=make_can_df('road/ambient/ambient_dyno_drive_benign_anomaly.log')
addel_df=make_can_df('road/ambient/ambient_dyno_drive_extended_long.log')
addes_df=make_can_df('road/ambient/ambient_dyno_drive_extended_short.log')
addri_df=make_can_df('road/ambient/ambient_dyno_drive_radio_infotainment.log')
addw_df=make_can_df('road/ambient/ambient_dyno_drive_winter.log')
adeab_df=make_can_df('road/ambient/ambient_dyno_exercise_all_bits.log')
adiri_df=make_can_df('road/ambient/ambient_dyno_idle_radio_infotainment.log')
adr_df=make_can_df('road/ambient/ambient_dyno_reverse.log')
ahsdd_df=make_can_df('road/ambient/ambient_highway_street_driving_diagnostics.log')
ahsdl_df=make_can_df('road/ambient/ambient_highway_street_driving_long.log')

In [5]:
df_injections = {
    "addbl":      (addbl_df, "595945450000FFFF"),
    "addbs":     (addbs_df, "595945450000FFFF"),
    "addba":      (addba_df, "595945450000FFFF"),
    "addel":     (addel_df, "595945450000FFFF"),
    "addes":      (addes_df, "595945450000FFFF"),
    "addri":     (addri_df, "595945450000FFFF"),
    "addw":       (addw_df, "595945450000FFFF"),
    "adeab":       (adeab_df, "595945450000FFFF"),
    "adiri":       (adiri_df, "595945450000FFFF"),
    "adr":      (adr_df, "595945450000FFFF"),
    "ahsdd":     (ahsdd_df, "595945450000FFFF"),
    "ahsdl":      (ahsdl_df, "595945450000FFFF"),
}

for name, (df, inj_str) in df_injections.items():
    output_path = f"road/preprocessed/ambient/{name}.csv"
    label_and_format_dataframe(df, inj_str, output_csv=output_path)

Flag
0    2802432
Name: count, dtype: int64

 Saved labeled file to: road/preprocessed/ambient/addbl.csv


Flag
0    996482
Name: count, dtype: int64

 Saved labeled file to: road/preprocessed/ambient/addbs.csv


Flag
0    720929
Name: count, dtype: int64

 Saved labeled file to: road/preprocessed/ambient/addba.csv


Flag
0    1335049
Name: count, dtype: int64

 Saved labeled file to: road/preprocessed/ambient/addel.csv


Flag
0    741572
Name: count, dtype: int64

 Saved labeled file to: road/preprocessed/ambient/addes.csv


Flag
0    874018
Name: count, dtype: int64

 Saved labeled file to: road/preprocessed/ambient/addri.csv


Flag
0    106939
Name: count, dtype: int64

 Saved labeled file to: road/preprocessed/ambient/addw.csv


Flag
0    4136020
Name: count, dtype: int64

 Saved labeled file to: road/preprocessed/ambient/adeab.csv


Flag
0    1473098
Name: count, dtype: int64

 Saved labeled file to: road/preprocessed/ambient/adiri.csv


Flag
0    115287
Name: count, dtype: int64

 Saved labeled file to: road/preprocessed/ambient/adr.csv


Flag
0    1053809
Name: count, dtype: int64

 Saved labeled file to: road/preprocessed/ambient/ahsdd.csv


Flag
0    8264936
Name: count, dtype: int64

 Saved labeled file to: road/preprocessed/ambient/ahsdl.csv


In [6]:
import glob
import os
# Path to the folder containing your CSV files
csv_folder = 'road/preprocessed/ambient'  # Update this
# Read all CSV files in the folder
csv_files = glob.glob(os.path.join(csv_folder, "*.csv"))
# Read and concatenate all files
attack_df = pd.concat((pd.read_csv(f) for f in csv_files), ignore_index=True)
# Save to a single merged CSV file
attack_df.to_csv("road/preprocessed/merged_normal_data.csv", index=False)

In [5]:
import pandas as pd
import glob
import os

# Paths
csv_folder = "road/preprocessed"
save_folder = "road/preprocessed/multiclass"
os.makedirs(save_folder, exist_ok=True)  # create folder if not exists

# Mapping substrings in filenames → custom flag
attack_map = {
    "rlona": 6,
    "rloffa": 5,
    "mecta": 4,
    "msa": 3,
    "fa": 2,
    "csa": 1
}


# Get all CSVs in folder
csv_files = glob.glob(os.path.join(csv_folder, "*.csv"))

for f in csv_files:
    df = pd.read_csv(f)

    # find which attack this file belongs to
    for key, flag_val in attack_map.items():
        if key in os.path.basename(f).lower():   # case-insensitive check
            df.loc[df["Flag"] == 1, "Flag"] = flag_val   # overwrite flag column
            save_path = os.path.join(save_folder, os.path.basename(f))
            df.to_csv(save_path, index=False)
            print(f"Assigned flag={flag_val} to {os.path.basename(f)}, saved -> {save_path}")
            break


Assigned flag=1 to csa1.csv, saved -> road/preprocessed/multiclass\csa1.csv
Assigned flag=1 to csa1m.csv, saved -> road/preprocessed/multiclass\csa1m.csv
Assigned flag=1 to csa2.csv, saved -> road/preprocessed/multiclass\csa2.csv
Assigned flag=1 to csa2m.csv, saved -> road/preprocessed/multiclass\csa2m.csv
Assigned flag=1 to csa3.csv, saved -> road/preprocessed/multiclass\csa3.csv
Assigned flag=1 to csa3m.csv, saved -> road/preprocessed/multiclass\csa3m.csv
Assigned flag=2 to fa1.csv, saved -> road/preprocessed/multiclass\fa1.csv
Assigned flag=2 to fa2.csv, saved -> road/preprocessed/multiclass\fa2.csv
Assigned flag=2 to fa3.csv, saved -> road/preprocessed/multiclass\fa3.csv
Assigned flag=4 to mecta.csv, saved -> road/preprocessed/multiclass\mecta.csv
Assigned flag=4 to mectam.csv, saved -> road/preprocessed/multiclass\mectam.csv
Assigned flag=3 to msa1.csv, saved -> road/preprocessed/multiclass\msa1.csv
Assigned flag=3 to msa1m.csv, saved -> road/preprocessed/multiclass\msa1m.csv
Assi

In [6]:
import glob
import os
import pandas as pd

# Path to the folder containing CSV files
csv_folder = 'road/preprocessed/multiclass'

# Read all CSV files in the folder
csv_files = glob.glob(os.path.join(csv_folder, "*.csv"))

# Filter: exclude files ending with 'm.csv'
csv_files = [f for f in csv_files if not os.path.basename(f).rstrip(".csv").endswith("m")]

# Read and concatenate all files
attack_df = pd.concat((pd.read_csv(f) for f in csv_files), ignore_index=True)

# Save to a single merged CSV file
attack_df.to_csv("road/preprocessed/merged/multiclass_attack_data_without_masquerade.csv", index=False)

print(f"Merged {len(csv_files)} files into multiclass_attack_data_without_masquerade.csv")


Merged 16 files into multiclass_attack_data_without_masquerade.csv
