# Idea
- Get first timestamp
- Resample Dataset to same timewindow
- Merge?

In [1]:
DEVICE_SOURCE = "Xi_sensor_1"

TIME_WINDOW = "30s"

In [2]:
from pathlib import Path

import numpy as np
import pandas as pd
import pickle

In [3]:
import sys
repo_base_path = Path("./../").resolve()
assert str(repo_base_path).endswith("csg_is"), f"{repo_base_path} is not a valid path to the CSG_IS repository" 

sys.path.append(str(repo_base_path))

from py_dataset import get_all_files_df
from py_dataset import read_in_files
from py_dataset import feature_plotting
from py_dataset import feature_selection
from py_dataset import net_feature_engineering
from py_dataset import entropy_feature_engineering
from py_dataset import sys_func

from py_dataset.classes import DataSource

In [4]:
feature_path = repo_base_path / "data_merg" /"top_260_features.pkl"

with open(feature_path, "rb") as f:
    SELECTED_FEATURES = pickle.load(f)

In [5]:
data_path = Path('/media/<User>/DC/MAP_CreationOfNewDatasetsForDFL/code&data/0_raw_collected_data/')
assert data_path.exists()

df = get_all_files_df.main(data_path)
df.head(1)

txt file found, will drop /media/<User>/DC/MAP_CreationOfNewDatasetsForDFL/code&data/0_raw_collected_data/Heqing/device1/3_thetick_4h/collections.txt
txt file found, will drop /media/<User>/DC/MAP_CreationOfNewDatasetsForDFL/code&data/0_raw_collected_data/Heqing/device2/1_normal/note.txt
                                           file_path
0  /media/<User>/DC/MAP_CreationOfNewDatasetsForDFL/...
RangeIndex(start=0, stop=622, step=1)
filetype
csv    530
zip     92
Name: count, dtype: int64
                                           file_path filetype  filesize_bytes
0  /media/<User>/DC/MAP_CreationOfNewDatasetsForDFL/...      csv           49419
data_source
SYS_data        92
KERN_data       91
FLS_data        91
RES_data        91
block_data      91
network_data    91
entropy_data    75
Name: count, dtype: int64


Unnamed: 0,file_path,filetype,filesize_bytes,researcher_name,device_name,label,data_source,file_name,device
0,/media//DC/MAP_CreationOfNewDatasetsForDFL/...,csv,49419,Heqing,device1,1_normal,block_data,e4_5f_01_68_35_e6.csv,Heqing_device1


In [6]:
single_dev = df[df["device"] == DEVICE_SOURCE]
assert len(single_dev["device"].unique()) == 1
print(single_dev.shape)

df = single_dev

(61, 9)


### Get First TimeStamp

In [7]:
def prepare_files(row, resample_time_window):
    if row["filetype"] != "csv" and row["filetype"] != "zip":
        return None

    min_t, max_t, df_t = read_in_files.get_all_timestamps(row['file_path'], row["filetype"],
                                            resample_time_window=resample_time_window)

    row["min_t"] = min_t
    row["max_t"] = max_t

    return row

rows = []
for k, row in df.iterrows():
    x = prepare_files(row, resample_time_window=None)

    rows.append(x)

df_timestamps = pd.DataFrame(rows)

! Dropped rows in DataFrames: (1248,)
timestamp   NaN
Name: 1, dtype: float64
timestamp   NaN
Name: 3, dtype: float64
timestamp   NaN
Name: 5, dtype: float64
timestamp   NaN
Name: 7, dtype: float64
timestamp   NaN
Name: 9, dtype: float64
timestamp   NaN
Name: 11, dtype: float64
timestamp   NaN
Name: 13, dtype: float64
timestamp   NaN
Name: 15, dtype: float64
timestamp   NaN
Name: 17, dtype: float64
timestamp   NaN
Name: 19, dtype: float64
timestamp   NaN
Name: 21, dtype: float64
timestamp   NaN
Name: 23, dtype: float64
timestamp   NaN
Name: 25, dtype: float64
timestamp   NaN
Name: 27, dtype: float64
timestamp   NaN
Name: 29, dtype: float64
timestamp   NaN
Name: 31, dtype: float64
timestamp   NaN
Name: 33, dtype: float64
timestamp   NaN
Name: 35, dtype: float64
timestamp   NaN
Name: 37, dtype: float64
timestamp   NaN
Name: 39, dtype: float64
timestamp   NaN
Name: 41, dtype: float64
timestamp   NaN
Name: 43, dtype: float64
timestamp   NaN
Name: 45, dtype: float64
timestamp   NaN
Name: 47

In [8]:
START = df_timestamps["min_t"].min()
f'Timestamps between: {df_timestamps["min_t"].min()} and {df_timestamps["max_t"].max()}'

'Timestamps between: 2023-11-03 09:13:38 and 2023-11-10 19:13:35'

### Resample files

In [9]:
def only_return_selected_columns(df: pd.DataFrame) -> pd.DataFrame:
    selected_cols = SELECTED_FEATURES & set(df.columns)
    return df[list(selected_cols)]

def create_window_sliced_depending_on_type(df: pd.DataFrame, start_timestamp: pd.Timestamp) -> pd.DataFrame:
    data_type = DataSource.from_columns(df.columns.tolist())
    print(f"Data type: {data_type}")

    match data_type:
        case (
            DataSource.FileSystem | DataSource.KernelEvents | DataSource.ResourceUsageEvents | DataSource.ResourceUsageEvents_RP4  | DataSource.BlockInputOutputEvents
        ):
            df = only_return_selected_columns(df)
            df = feature_plotting.test_transform_all_to_numeric_columns(df.copy())
            return df.resample(TIME_WINDOW, origin=start_timestamp).mean()
        case DataSource.NetRecords:
            mean_df = net_feature_engineering.create_mean_net_df_single_label(df, time_window=TIME_WINDOW, start=start_timestamp)
            return only_return_selected_columns(mean_df)
        case DataSource.Entropy:
            if "entropy_file_count" not in SELECTED_FEATURES:
                return None
            return entropy_feature_engineering.resample_high_entropy_grouping_by_label_single_label(df, time_window=TIME_WINDOW, start=start_timestamp)
            
        case DataSource.SystemCalls:
            raise NotImplementedError("SystemCalls not implemented yet")
        

def check_if_keep_source(columns):
    data_type = DataSource.from_columns(columns.tolist())
    if data_type in [DataSource.SystemCalls, DataSource.Entropy, DataSource.NetRecords]:
        return True

    chosen_cols_from_source = data_type.value & SELECTED_FEATURES

    return any(chosen_cols_from_source)

In [10]:

df_mean_collection: list[pd.DataFrame] = []

for source, group_df in df.groupby(["data_source"]):
    dfs_same_source = []

    checked_source = False
    for k, row in group_df.iterrows():
        if row["filetype"] != "csv":
            print(f"Skipping {row['file_path']}")
            continue


        df_t = read_in_files.get_all_columns(row['file_path'], row["filetype"])
        if not checked_source:
            if not check_if_keep_source(df_t.columns):
                print(f"Skipping {source}")
                break
            checked_source = True

        try:
            df_mean = create_window_sliced_depending_on_type(df_t, start_timestamp=START)
        except Exception as ex:
            print(f"Problem in file {row['file_path']}")
            raise ex
        
        if df_mean is None:
            print(f"Skipping {row['file_path']}")
            continue
        
        df_mean = df_mean.copy()
        print(f"Shape: {df_mean.shape}")
        df_mean["label"] = row["label"]
        df_mean["device"] = row["device"]
        df_mean["data_source"] = row["data_source"]

        # print(f"Start and End of Index Timestamp: {df_mean.index[0]} - {df_mean.index[-1]}")
        dfs_same_source.append(df_mean)

    if len(dfs_same_source) == 0:
        print(f"Skipping {source}")
        continue
    same_df = pd.concat(dfs_same_source, axis=0)
    print(f"same_df shape: {same_df.shape}")
    df_mean_collection.append(same_df)
    # break

len(df_mean_collection)

Data type: DataSource.FileSystem
Shape: (480, 54)
Data type: DataSource.FileSystem
Shape: (481, 54)
Data type: DataSource.FileSystem
Shape: (481, 54)
Data type: DataSource.FileSystem
Shape: (480, 54)
Data type: DataSource.FileSystem
Shape: (480, 54)
Data type: DataSource.FileSystem
Shape: (481, 54)
Data type: DataSource.FileSystem
Shape: (482, 54)
Data type: DataSource.FileSystem
Shape: (482, 54)
Data type: DataSource.FileSystem
Shape: (482, 54)
same_df shape: (4329, 57)
Data type: DataSource.KernelEvents
Shape: (481, 56)
Data type: DataSource.KernelEvents
Shape: (481, 56)
Data type: DataSource.KernelEvents
Shape: (481, 56)
Data type: DataSource.KernelEvents
Shape: (481, 56)
Data type: DataSource.KernelEvents
Shape: (481, 56)
Data type: DataSource.KernelEvents
Shape: (481, 56)
Data type: DataSource.KernelEvents
Shape: (482, 56)
Data type: DataSource.KernelEvents
Shape: (482, 56)
Data type: DataSource.KernelEvents
Shape: (483, 56)
same_df shape: (4333, 59)
Data type: DataSource.Resource

5

#### Remove Dublicate Columns and Concatenate

=> some dont work => if Index error use merge manually

In [11]:
try:
    pd.concat(df_mean_collection, axis=1)
except Exception as ex:
    print(ex)

In [12]:
from collections import defaultdict

cols = defaultdict(int)
for dfs in df_mean_collection:
    for k in dfs.columns.tolist():
        cols[k] +=1

print("Dublicates:", [k for k, v in cols.items() if v > 1])

to_ignore = ["data_source", "label", "device"]

infos = pd.DataFrame(columns=to_ignore)

for dfs in df_mean_collection:
    for k in dfs.columns.tolist():
        if k in to_ignore:
            continue

        if cols[k] > 1:
            print(k)

            dfs.rename(columns={k: f'{k}_{dfs["data_source"].iloc[0]}'}, inplace=True)

    assert dfs["device"].nunique() == 1
    assert dfs["label"].nunique() == 9
    assert dfs["data_source"].nunique() == 1
    
    infos = pd.concat([infos,dfs[to_ignore]], axis=0)

    # for t,row in dfs.iterrows():
        # infos.append({"device": row["device"], "label": row["label"], "data_source": row["data_source"], "timestamp": t})
    
    dfs.drop(columns=to_ignore, inplace=True)

Dublicates: ['jbd2:jbd2_checkpoint_stats', 'writeback:writeback_dirty_inode_enqueue', 'block:block_dirty_buffer', 'jbd2:jbd2_start_commit', 'writeback:global_dirty_state', 'writeback:writeback_dirty_page', 'writeback:writeback_dirty_inode', 'block:block_bio_remap', 'writeback:sb_clear_inode_writeback', 'ext4:ext4_writepages_result', 'ext4:ext4_es_lookup_extent_enter', 'block:block_touch_buffer', 'seconds', 'ext4:ext4_releasepage', 'writeback:writeback_mark_inode_dirty', 'ext4:ext4_free_inode', 'block:block_bio_backmerge', 'filemap:mm_filemap_delete_from_page_cache', 'ext4:ext4_journal_start', 'block:block_unplug', 'filemap:mm_filemap_add_to_page_cache', 'ext4:ext4_unlink_enter', 'block:block_getrq', 'jbd2:jbd2_handle_start', 'ext4:ext4_da_update_reserve_space', 'jbd2:jbd2_handle_stats', 'label', 'device', 'data_source', 'kmem:kmem_cache_alloc', 'kmem:kfree', 'timer:timer_start', 'page-faults', 'fib:fib_table_lookup', 'task:task_newtask', 'gpio:gpio_value', 'tcp:tcp_probe', 'kmem:mm_pag

  infos = pd.concat([infos,dfs[to_ignore]], axis=0)


In [13]:
df_all = pd.concat(df_mean_collection[:-1], axis=1).merge(df_mean_collection[-1], how='outer', left_index=True, right_index=True)
df_all.sample(2)

Unnamed: 0_level_0,ext4:ext4_mark_inode_dirty,jbd2:jbd2_checkpoint_stats_FLS_data,writeback:writeback_dirty_inode_enqueue_FLS_data,ext4:ext4_discard_preallocations,ext4:ext4_es_lookup_extent_exit,block:block_dirty_buffer_FLS_data,jbd2:jbd2_start_commit_FLS_data,ext4:ext4_da_reserve_space,jbd2:jbd2_commit_logging,jbd2:jbd2_end_commit,...,writeback:global_dirty_state_RES_data,ext4:ext4_da_update_reserve_space_RES_data,iowritetime,jbd2:jbd2_handle_stats_RES_data,iTLB-load-misses,write_ops,util,write_kbs,(OLD) AverageLength,(OLD) VarianceLength
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-11-03 14:10:38,0.0,0.0,2.0,0.0,348.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,423630.666667,0.0,0.0,0.0,530.260674,413085.508473
2023-11-04 16:51:38,39.5,0.5,9.5,8.0,179.0,9.5,0.5,4.0,0.5,0.5,...,1.0,3.5,36.0,24.5,617948.0,10.266667,10.44,1.6,699.466667,483845.303808


In [14]:
df_all = df_all.merge(infos["label"], how='outer', left_index=True, right_index=True)

In [15]:
df_all.shape, len(SELECTED_FEATURES)

((21664, 189), 260)

In [16]:
df_all

Unnamed: 0_level_0,ext4:ext4_mark_inode_dirty,jbd2:jbd2_checkpoint_stats_FLS_data,writeback:writeback_dirty_inode_enqueue_FLS_data,ext4:ext4_discard_preallocations,ext4:ext4_es_lookup_extent_exit,block:block_dirty_buffer_FLS_data,jbd2:jbd2_start_commit_FLS_data,ext4:ext4_da_reserve_space,jbd2:jbd2_commit_logging,jbd2:jbd2_end_commit,...,ext4:ext4_da_update_reserve_space_RES_data,iowritetime,jbd2:jbd2_handle_stats_RES_data,iTLB-load-misses,write_ops,util,write_kbs,(OLD) AverageLength,(OLD) VarianceLength,label
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-11-03 09:13:38,0.0,1.0,5.5,0.0,267.5,3.0,0.5,0.0,0.5,0.5,...,0.000000,0.500000,0.000000,885030.000000,1.066667,5.333333,0.200000,1012.877778,446149.452447,1_normal
2023-11-03 09:13:38,0.0,1.0,5.5,0.0,267.5,3.0,0.5,0.0,0.5,0.5,...,0.000000,0.500000,0.000000,885030.000000,1.066667,5.333333,0.200000,1012.877778,446149.452447,1_normal
2023-11-03 09:13:38,0.0,1.0,5.5,0.0,267.5,3.0,0.5,0.0,0.5,0.5,...,0.000000,0.500000,0.000000,885030.000000,1.066667,5.333333,0.200000,1012.877778,446149.452447,1_normal
2023-11-03 09:13:38,0.0,1.0,5.5,0.0,267.5,3.0,0.5,0.0,0.5,0.5,...,0.000000,0.500000,0.000000,885030.000000,1.066667,5.333333,0.200000,1012.877778,446149.452447,1_normal
2023-11-03 09:13:38,0.0,1.0,5.5,0.0,267.5,3.0,0.5,0.0,0.5,0.5,...,0.000000,0.500000,0.000000,885030.000000,1.066667,5.333333,0.200000,1012.877778,446149.452447,1_normal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-11-10 19:12:38,23.5,1.0,4.0,4.0,389.5,2.5,0.5,2.0,0.5,0.5,...,1.000000,27.500000,7.000000,660202.500000,4.933333,8.533333,0.633333,1155.276569,359463.056745,4_bashlite
2023-11-10 19:13:08,,,,,,,,,,,...,1.333333,11.666667,8.666667,744738.666667,2.000000,8.223333,0.333333,1244.150943,297652.776999,4_bashlite
2023-11-10 19:13:08,,,,,,,,,,,...,1.333333,11.666667,8.666667,744738.666667,2.000000,8.223333,0.333333,1244.150943,297652.776999,4_bashlite
2023-11-10 19:13:08,,,,,,,,,,,...,1.333333,11.666667,8.666667,744738.666667,2.000000,8.223333,0.333333,1244.150943,297652.776999,4_bashlite


# Add Sys Call Data

In [17]:
def read_in_preprocessed_Xi_sensor_1():
    file_path = Path('/media/<User>/DC/IS_Data_Exploration_and_Feature_Engineering_for_an_IoT_Device_Behavior_Fingerprinting_Dataset/sys_system_calls_Xi_sensor_1/merged_data.npz')
    assert file_path.exists(), 'Data not found'
    vocab_file = Path("/media/<User>/DC/IS_Data_Exploration_and_Feature_Engineering_for_an_IoT_Device_Behavior_Fingerprinting_Dataset/sys_system_calls_Xi_sensor_1/vocabulary.pkl")
    assert vocab_file.exists(), f"Vocab file at {vocab_file} does not exist."

    with np.load(file_path, allow_pickle=True) as npz:
        print(npz.files)
        X = npz['arr_0']
    print(X.shape)

    vectorizer = sys_func.create_CountVectorizer(vocab_file=vocab_file)
    cols = vectorizer.get_feature_names_out()
    cols = np.append(cols, [['timestamp'], ['label']])
    print (cols.shape)
    df = pd.DataFrame(X, columns=cols)
    # df.set_index('timestamp', inplace=True)
    read_in_files._clean_and_set_index_timestamp(df)

    label_encoder = sys_func.create_label_encoder_for_labels()
    df['label'] = label_encoder.inverse_transform(df['label'].values.astype(int))

    vectors = df.copy()
    print(vectors.shape)
    
    return vectors

sys_df = read_in_preprocessed_Xi_sensor_1()
# sys_df = only_return_selected_columns(sys_df)
# sys_df_mean = sys_df.resample(TIME_WINDOW, origin=START).mean()

['arr_0']
(10932, 151)
Loaded set: ('accept', 'accept4', 'access', 'add_key', 'adjtimex', 'bind', 'brk', 'capget', 'capset', 'chdir', 'chmod', 'chown32', 'clock_gettime', 'clone', 'close', 'connect', 'dup', 'dup2', 'dup3', 'epoll_create1', 'epoll_ctl', 'epoll_wait', 'eventfd2', 'execve', 'exit', 'exit_group', 'faccessat', 'fadvise64_64', 'fallocate', 'fchdir', 'fchmod', 'fchmodat', 'fchown32', 'fcntl64', 'fgetxattr', 'flistxattr', 'fsetxattr', 'fstat64', 'fstatat64', 'fstatfs64', 'fsync', 'ftruncate64', 'futex', 'getcwd', 'getdents64', 'getegid32', 'geteuid32', 'getgid32', 'getgroups32', 'getpeername', 'getpgid', 'getpgrp', 'getpid', 'getppid', 'getrandom', 'getresgid32', 'getresuid32', 'getsid', 'getsockname', 'getsockopt', 'gettid', 'getuid32', 'getxattr', 'inotify_add_watch', 'inotify_rm_watch', 'ioctl', 'kcmp', 'keyctl', 'kill', 'lchown32', 'lgetxattr', 'llseek', 'lstat64', 'madvise', 'mkdir', 'mmap2', 'mount', 'mprotect', 'mremap', 'munmap', 'newselect', 'openat', 'pause', 'perf_e

In [18]:
sys_df.head(2)

Unnamed: 0_level_0,accept,accept4,access,add_key,adjtimex,bind,brk,capget,capset,chdir,...,umount2,uname,unlink,unlinkat,utimensat,wait4,waitid,write,writev,label
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-11-03 09:13:38,0.0,6.5e-05,0.012323,0.0,0.0,0.000178,0.01351,0.000393,0.000119,3.9e-05,...,0.0,0.003081,3.5e-05,0.0,0.0,0.004878,0.000162,0.007734,4e-05,1_normal
2023-11-03 09:13:49,0.0,0.000452,0.018115,0.0,0.0,0.0,0.018761,0.000962,0.000368,6e-05,...,0.0,0.004517,0.000107,0.0,0.0,0.00814,0.000251,0.016876,6.2e-05,1_normal


In [19]:
sys_df = only_return_selected_columns(sys_df)
sys_df_mean = sys_df.resample(TIME_WINDOW, origin=START).mean()

In [20]:
sys_df_mean.head(2)

Unnamed: 0_level_0,epoll_ctl,capget,getppid,getuid32,fstat64,newselect,setsockopt,getsockopt,setitimer,getpgrp,...,llseek,accept4,set_robust_list,dup3,poll,waitid,setsid,prlimit64,geteuid32,access
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-11-03 09:13:38,0.002622,0.000725,0.000329,0.008785,0.237478,0.060513,0.013041,0.003451,0.000125,0.0,...,0.329985,0.000366,0.001689,4.9e-05,0.004999,0.000292,8.1e-05,7.8e-05,0.009541,0.01626
2023-11-03 09:14:08,0.003254,0.000804,0.000415,0.008568,0.278861,0.057846,0.017073,0.004966,0.000129,0.0,...,0.350089,0.000461,0.002058,4.8e-05,0.006252,0.000368,0.000113,0.000109,0.009519,0.016408


In [21]:
all_df1 = df_all.merge(sys_df_mean, how='outer', left_index=True, right_index=True)
print(f"all_df1 shape: {all_df1.shape}")

all_df1 shape: (38686, 288)


In [22]:
all_df1.head(4)

Unnamed: 0_level_0,ext4:ext4_mark_inode_dirty,jbd2:jbd2_checkpoint_stats_FLS_data,writeback:writeback_dirty_inode_enqueue_FLS_data,ext4:ext4_discard_preallocations,ext4:ext4_es_lookup_extent_exit,block:block_dirty_buffer_FLS_data,jbd2:jbd2_start_commit_FLS_data,ext4:ext4_da_reserve_space,jbd2:jbd2_commit_logging,jbd2:jbd2_end_commit,...,llseek,accept4,set_robust_list,dup3,poll,waitid,setsid,prlimit64,geteuid32,access
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-11-03 09:13:38,0.0,1.0,5.5,0.0,267.5,3.0,0.5,0.0,0.5,0.5,...,0.329985,0.000366,0.001689,4.9e-05,0.004999,0.000292,8.1e-05,7.8e-05,0.009541,0.01626
2023-11-03 09:13:38,0.0,1.0,5.5,0.0,267.5,3.0,0.5,0.0,0.5,0.5,...,0.329985,0.000366,0.001689,4.9e-05,0.004999,0.000292,8.1e-05,7.8e-05,0.009541,0.01626
2023-11-03 09:13:38,0.0,1.0,5.5,0.0,267.5,3.0,0.5,0.0,0.5,0.5,...,0.329985,0.000366,0.001689,4.9e-05,0.004999,0.000292,8.1e-05,7.8e-05,0.009541,0.01626
2023-11-03 09:13:38,0.0,1.0,5.5,0.0,267.5,3.0,0.5,0.0,0.5,0.5,...,0.329985,0.000366,0.001689,4.9e-05,0.004999,0.000292,8.1e-05,7.8e-05,0.009541,0.01626


In [23]:
spath = repo_base_path / "training" / "data" / f"all_df_{TIME_WINDOW}_{DEVICE_SOURCE}.csv"

all_df1.to_csv(str(spath))