In [1]:
%load_ext autoreload
%autoreload 2
import os
import sys
sys.path.append(os.path.abspath('../../src/'))
sys.path.append(os.path.abspath('../../'))

In [2]:
import numpy as np
from preprocessing.train_test_split import train_test_split, resampling_data
import itertools
from pathlib import Path
import random

from src.preprocessing.train_test_split import Writer
from src.preprocessing.train_test_split import write_to_file

## Generate train/val/test file

In [3]:
data_dir = '../../../Data/LISA/'
car_models = ['Kia', 'BMW', 'Tesla']
classes = ['Normal', 'Fuzzy', 'Replay']

In [4]:
for cm, c in itertools.product(car_models, classes):
    file_name = Path(data_dir) / f'{cm}/{c}.npz'
    data = np.load(file_name)
    X, y = data['X'], data['y']
    print(f'Car: {cm} - Class {c} size = {len(X)}')
    indices_lists = train_test_split(len(X), test_fraction=0.2, val_fraction=0.1) 
    prefix = ['train', 'val', 'test']
    for prefix, indices in zip(prefix, indices_lists):
        print(f'{prefix} size: ', len(indices))
        X_subset = X[indices] 
        y_subset = y[indices]
        save_file = Path(data_dir) / f'{cm}/{prefix}_{c}.npz'
        # np.savez_compressed(save_file, X=X_subset, y=y_subset)

Car: Kia - Class Normal size = 80658
train size:  56462
val size:  8065
test size:  16131
Car: Kia - Class Fuzzy size = 35868
train size:  25109
val size:  3586
test size:  7173
Car: Kia - Class Replay size = 1282
train size:  898
val size:  128
test size:  256
Car: BMW - Class Normal size = 43194
train size:  30237
val size:  4319
test size:  8638
Car: BMW - Class Fuzzy size = 80938
train size:  56658
val size:  8093
test size:  16187
Car: BMW - Class Replay size = 38493
train size:  26946
val size:  3849
test size:  7698
Car: Tesla - Class Normal size = 157987
train size:  110592
val size:  15798
test size:  31597
Car: Tesla - Class Fuzzy size = 34960
train size:  24472
val size:  3496
test size:  6992
Car: Tesla - Class Replay size = 5998
train size:  4200
val size:  599
test size:  1199


# Generate train/test/folder

In [3]:
def create_folder_from_npz_file(car_model, in_dir, out_dir, dir_type, size, attack_normal_ratio):
    # Resample data
    class_dist, class_size, Xs, ys = resampling_data(car_model=car_model, in_dir=in_dir, 
                                    file_type=dir_type, N_samples=size, attack_normal_ratio=attack_normal_ratio)
    X = np.concatenate(Xs)
    y = np.concatenate(ys)
    print('Resample data info')
    print('Distribution: ', class_dist)
    print('Size: ', [len(Xi) for Xi in Xs])
    print('Total size: ', len(X))
    out_dir = out_dir + '/{}/'  # to adapt with car_model
    out_path = Path(out_dir.format(car_model))
    writer = Writer(outdir=out_path, type_name=f'{dir_type}_{size}')
    # return write_to_file(writer, X, y)


In [4]:
in_dir = '../../../Data/LISA/'
out_dir = '../../../Data/LISA/'
car_model = ['Kia', 'BMW', 'Tesla']
for cm in car_model:
    print('Model: ', cm)
    # Generate the attack/normal ratio
    attack_normal_ratio = random.uniform(0.2, 0.3)
    create_folder_from_npz_file(car_model=cm, in_dir=in_dir, out_dir=out_dir, 
                                dir_type='train', size=50000, attack_normal_ratio=attack_normal_ratio)
    # create_folder_from_npz_file(car_model=cm, in_dir=in_dir, out_dir=out_dir, 
                                # dir_type='test', size=10000, attack_normal_ratio=attack_normal_ratio)
    # create_folder_from_npz_file(car_model=cm, in_dir=in_dir, out_dir=out_dir, 
    #                             dir_type='val', size=10000, attack_normal_ratio=attack_normal_ratio)

Model:  Kia
Resample data info
Distribution:  [0.76230784 0.11884608 0.11884608]
Size:  [38115  5942  5942]
Total size:  44955
Model:  BMW
Resample data info
Distribution:  [0.77154821 0.1142259  0.1142259 ]
Size:  [38577  5711  5711]
Total size:  41659
Model:  Tesla
Resample data info
Distribution:  [0.75256223 0.12371889 0.12371889]
Size:  [37628  6185  6185]
Total size:  48013
