# Make Synthetic Virtual Senssors Dataset

In [34]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from utils import dataLoader
from utils import utils
from utils import DataManagement
import os

In [35]:
SENSOR_TO_REMOVE = "out_c3"


SYNTHETIC_DATASETS = {
    "data/simulated_dataset/amplitude_shift/1.0_1.0.h5":SENSOR_TO_REMOVE, 
    "data/simulated_dataset/amplitude_shift/2.0_2.0.h5":None, 
    "data/simulated_dataset/amplitude_shift/3.0_3.0.h5":SENSOR_TO_REMOVE, 
    "data/simulated_dataset/amplitude_shift/4.0_4.0.h5":None, 
    "data/simulated_dataset/amplitude_shift/5.0_5.0.h5":SENSOR_TO_REMOVE, 
    "data/simulated_dataset/amplitude_shift/6.0_6.0.h5":None, 
    "data/simulated_dataset/amplitude_shift/7.0_7.0.h5":SENSOR_TO_REMOVE, 
    "data/simulated_dataset/amplitude_shift/8.0_8.0.h5":None, 
    "data/simulated_dataset/amplitude_shift/9.0_9.0.h5":SENSOR_TO_REMOVE, 
    "data/simulated_dataset/amplitude_shift/10.0_10.0.h5":None,
    
    "data/simulated_dataset/output_noise/0.25.h5":SENSOR_TO_REMOVE,
    "data/simulated_dataset/output_noise/0.50.h5":None,
    "data/simulated_dataset/output_noise/0.75.h5":SENSOR_TO_REMOVE,
    "data/simulated_dataset/output_noise/1.00.h5":None,
    "data/simulated_dataset/output_noise/1.25.h5":SENSOR_TO_REMOVE,
    "data/simulated_dataset/output_noise/1.50.h5":None,
    "data/simulated_dataset/output_noise/1.75.h5":SENSOR_TO_REMOVE,
    "data/simulated_dataset/output_noise/2.00.h5":None,
    "data/simulated_dataset/output_noise/2.25.h5":SENSOR_TO_REMOVE,
    "data/simulated_dataset/output_noise/2.50.h5":None,
    
    "data/simulated_dataset/time_shift/0.h5":SENSOR_TO_REMOVE,
    "data/simulated_dataset/time_shift/2.h5":None,
    "data/simulated_dataset/time_shift/4.h5":SENSOR_TO_REMOVE,
    "data/simulated_dataset/time_shift/6.h5":None,
    "data/simulated_dataset/time_shift/8.h5":SENSOR_TO_REMOVE,
    "data/simulated_dataset/time_shift/10.h5":None,
    "data/simulated_dataset/time_shift/12.h5":SENSOR_TO_REMOVE,
    "data/simulated_dataset/time_shift/14.h5":None,
    "data/simulated_dataset/time_shift/16.h5":SENSOR_TO_REMOVE,
    "data/simulated_dataset/time_shift/18.h5":None
}

CONTENT_DATASET_PATH = "data/simulated_dataset/01 - Source Domain.h5"

SAVE_FOLDER = "data/synthetic_virtual_senssors"


In [38]:
def get_v_s_dataframe(dataset_path:str, col_to_remove=None) -> pd.DataFrame:
    
    df = dataLoader.load_dataframe(dataset_path, False)
    
    if not col_to_remove is None:
        df[col_to_remove] = 0.

    return df    

def make_n_save(path:str, senssor_to_remove:str):
    _itermediate_path = "/".join(path.split('/')[1:-1])
    filename = utils.get_name(path)
    final_path = f'{SAVE_FOLDER}/{_itermediate_path}'
    os.makedirs(final_path, exist_ok=True)
    
    _df = get_v_s_dataframe(path, senssor_to_remove)
    
    _df_train, _df_valid = DataManagement.train_valid_split(_df)
    
    _df.to_hdf(f"{final_path}/{filename}.h5", key='data')
    _df_train.to_hdf(f"{final_path}/{filename}_train.h5", key='data')
    _df_valid.to_hdf(f"{final_path}/{filename}_valid.h5", key='data')
    

In [39]:
for path, sensor_to_remove in SYNTHETIC_DATASETS.items():
    make_n_save(path, sensor_to_remove)
    
make_n_save(CONTENT_DATASET_PATH, SENSOR_TO_REMOVE)