# Notebook to create the dataset for the Grid Search

This notebook is used to create the tabular dataset starting from cycle data by using only the subset of signal selected. 


Input: 
- <cycle_order.csv> the file with the list of cycles and their lables
- <selected2> test <selected> the signal selection under evaluation
- <DeltaT> the window duration: default value = 60 minutes
- <strategy> the strategy for the feature extraction process: default value = ad-hoc

Output: 
- <"dataset/"+selected+".pkl"> the dataset for the Grid Search

In [1]:
import pandas as pd
import pathlib
import pickle
import sys
import os 

ParentPath = str(pathlib.Path().absolute().parent)

sys.path.insert(1, ParentPath+'/classes/public/')
from makerDatasetSpecialized import MakerDatasetSpec


if not os.path.exists('dataset'):
    os.makedirs('dataset')    

#path of the cycles
fpath = ParentPath+'/data/'

file_label = pd.read_csv(fpath+'cycle_order.csv', sep=",")


DeltaT = 60
selected = "All"

signalsSelected = pickle.load(open("selected/"+selected+".pkl","rb"))


# MakerDatasetSpec: transforms cycle data into a tabular dataset usable by the state-of-art classifiers

#Parameters: 
# file_label: the file containing the cycles and their labels ordered by acquisition time
# fpath: the path where cycles are stored
# minuteWindow: how frequently create a new row in the tabular dataset. Default = 60 minutes i.e., full cycle

# Returns
# Xnp: the tabular with all features for each cycle
# Ynp: the label vector
# Id:  the list of cycles ids

maker = MakerDatasetSpec(file_label, fpath, minuteWindow = DeltaT)

#makeDataset implements the different feature selection strategies
#If different from ad-hoc, specify it by using the argument strategy = <strategy>
#List of implemented strategies
#"ad-hoc" 
# [] - list with a subset of the ad-hoc features 
#"tsfel-all" 
#"tsfel-all-corr"
#"tsfel-statistical" 
#"tsfel-temporal"
#"vest"


Xnp, Ynp, Id = maker.makeDataset(signals=signalsSelected)
df = pd.DataFrame.from_records(Xnp)
df.columns = maker.features
df.insert(loc=0, column='ExpID', value=Id)
df['Label']=Ynp
pickle.dump(df,open("dataset/"+selected+".pkl","wb")) 