In [1]:
from base import params
import glob
import os

### Initialize Directories


In [2]:
os.makedirs(params.edge_dir, exist_ok=True)
os.makedirs(params.fog_dir, exist_ok=True)
os.makedirs(params.raw_dir, exist_ok=True)

### Download ToN_IoT Datasets
1. Downloads ToN_IoT Processed_datasets from https://research.unsw.edu.au/projects/toniot-datasets
2. Extracts "Processed_IoT_datasets", "Processed_Linux_datasets", dan "Processed_Windows_datasets" into "data_src/ToN_IoT/"

In [3]:
print("List of Processed IoT Datasets:")
[print(i) for i in glob.glob(f'{params.raw_edge}*.csv')];
print("List of Processed Linux Datasets:")
[print(i) for i in glob.glob(f'{params.raw_linux}*.csv')];
print("List of Processed Windows Datasets:")
[print(i) for i in glob.glob(f'{params.raw_windows}*.csv')];

List of Processed IoT Datasets:
data_src/ToN_IoT/Processed_IoT_dataset/IoT_Motion_Light.csv
data_src/ToN_IoT/Processed_IoT_dataset/IoT_GPS_Tracker.csv
data_src/ToN_IoT/Processed_IoT_dataset/IoT_Garage_Door.csv
data_src/ToN_IoT/Processed_IoT_dataset/IoT_Weather.csv
data_src/ToN_IoT/Processed_IoT_dataset/IoT_Fridge.csv
data_src/ToN_IoT/Processed_IoT_dataset/IoT_Modbus.csv
data_src/ToN_IoT/Processed_IoT_dataset/IoT_Thermostat.csv
List of Processed Linux Datasets:
data_src/ToN_IoT/Processed_Linux_dataset/linux_memory1.csv
data_src/ToN_IoT/Processed_Linux_dataset/linux_memory2.csv
data_src/ToN_IoT/Processed_Linux_dataset/linux_disk_2.csv
data_src/ToN_IoT/Processed_Linux_dataset/linux_disk_1.csv
data_src/ToN_IoT/Processed_Linux_dataset/Linux_process_2.csv
data_src/ToN_IoT/Processed_Linux_dataset/Linux_process_1.csv
List of Processed Windows Datasets:
data_src/ToN_IoT/Processed_Windows_dataset/windows7_dataset.csv
data_src/ToN_IoT/Processed_Windows_dataset/windows10_dataset.csv


### Cleaning: Edge Datasets
Edge Devices included:
1. Fridge
2. Garage Door
3. Modbus
4. Motion Light
5. Thermostat
6. Weather

In [4]:
from _cleaning_edge import cleaning_edge_dataset
preps = cleaning_edge_dataset()
preps.Run_at_once()

Dataset Fridge saved into datastream/ToN_IoT_Edge_dataset/Edge_IoT_Fridge.csv. Data length: 452,028
Dataset Garage_Door saved into datastream/ToN_IoT_Edge_dataset/Edge_IoT_Garage_Door.csv. Data length: 179,681
Dataset Modbus saved into datastream/ToN_IoT_Edge_dataset/Edge_IoT_Modbus.csv. Data length: 281,037
Dataset Motion_Light saved into datastream/ToN_IoT_Edge_dataset/Edge_IoT_Motion_Light.csv. Data length: 222,351
Dataset Thermostat saved into datastream/ToN_IoT_Edge_dataset/Edge_IoT_Thermostat.csv. Data length: 389,638
Dataset Weather saved into datastream/ToN_IoT_Edge_dataset/Edge_IoT_Weather.csv. Data length: 593,295


### Make Data Fusion on Edge Dataset
New Edge Dataset will be created through data fusion, combining all the available features at the same timestamp. Each of Edge devices will have its own dataset and ground truth.

In [5]:
from _edge_fusion_procedures import data_fusion_at_edge
fusion = data_fusion_at_edge()
fusion.Run_at_once()

Dataset Edge FRIDGE saved to datastream/ToN_IoT_Edge_dataset/Edge_Fusion_Fridge.csv. Data Length:452,028.
Dataset Edge GARAGE_DOOR saved to datastream/ToN_IoT_Edge_dataset/Edge_Fusion_Garage_Door.csv. Data Length:179,681.
Dataset Edge MODBUS saved to datastream/ToN_IoT_Edge_dataset/Edge_Fusion_Modbus.csv. Data Length:281,037.
Dataset Edge MOTION_LIGHT saved to datastream/ToN_IoT_Edge_dataset/Edge_Fusion_Motion_Light.csv. Data Length:222,673.
Dataset Edge THERMOSTAT saved to datastream/ToN_IoT_Edge_dataset/Edge_Fusion_Thermostat.csv. Data Length:389,638.
Dataset Edge WEATHER saved to datastream/ToN_IoT_Edge_dataset/Edge_Fusion_Weather.csv. Data Length:593,295.


### Cleaning Fog Datasets

In [None]:
from _cleaning_fog import cleaning_fog_dataset
preps = cleaning_fog_dataset()
preps.Run_at_once()

### Make Data Fusion on Fog Dataset
New Fog Dataset will be created through data fusion, combining all the available features at the same timestamp. These fog datasets inherit ground truth label from Edge datasets.

Note:
Windows7 and Windows10 dataset will no longer be used as Fog Datasets, since its data is far less then available label at Edge Ground Truth.

In [None]:
from _fog_fusion_procedures import data_fusion_at_fog
fusion = data_fusion_at_fog()
fusion.Run_at_once()