## Data format



Format different data to TSload DataFrame (TSdf).



### Initialization



In [1]:
%load_ext autoreload
%autoreload 2
import numpy as np
import pandas as pd
from TSbench.TSdata import LoaderTSdf, DataFormat

Initialize a loader



In [1]:
path = "data/example_data_format/data"
datatype = "DataToFormat"
permission = "overwrite"  # Overwrite is used for repeated execution
loader = LoaderTSdf(path=path, datatype=datatype, permission=permission)
loader.restart_dataset() # for fresh re-run

### Format with default value



#### From DataFrame



In [1]:
ID = "FromDataFrame"
d = {"feature0": np.arange(10), "feature1": np.arange(10,20)}
df = pd.DataFrame(data=d)

TSdf = DataFormat.df_to_TSdf(df, ID=ID)
loader.add_data(df, ID=ID, collision="overwrite")
print(loader.get_df())

#+begin_example
                             feature0  feature1
ID            timestamp dim                    
FromDataFrame 0         0           0        10
              1         0           1        11
              2         0           2        12
              3         0           3        13
              4         0           4        14
              5         0           5        15
              6         0           6        16
              7         0           7        17
              8         0           8        18
              9         0           9        19
#+end_example

In [1]:
loader.metadata

IDs              features split_pattern
datatype                                                         
DataToFormat  [FromDataFrame]  [feature0, feature1]            []

#### From an array



In [1]:
arr_feature0 = np.arange(10)
TSdf = DataFormat.np_to_TSdf(arr_feature0, ID=ID)
loader.add_data(df, ID=ID, collision="overwrite")
print(loader.get_df())

#+begin_example
                             feature0  feature1
ID            timestamp dim                    
FromDataFrame 0         0           0        10
              1         0           1        11
              2         0           2        12
              3         0           3        13
              4         0           4        14
              5         0           5        15
              6         0           6        16
              7         0           7        17
              8         0           8        18
              9         0           9        19
#+end_example

#### From a dictionary of multiple features



In [1]:
arr_feature1 = np.arange(10)
dict_features = {"feature0" : arr_feature0, "feature1" : arr_feature1}
TSdf = DataFormat.dict_to_TSdf(dict_features, ID=ID)
loader.add_data(df, ID=ID, collision="overwrite")
print(loader.get_df())

#+begin_example
                             feature0  feature1
ID            timestamp dim                    
FromDataFrame 0         0           0        10
              1         0           1        11
              2         0           2        12
              3         0           3        13
              4         0           4        14
              5         0           5        15
              6         0           6        16
              7         0           7        17
              8         0           8        18
              9         0           9        19
#+end_example

### Format with custom value



#### Create timeseries information



Timestamp



In [1]:
T = 10 # number of timestamp
timestamp = list(pd.date_range(start='2021-01-01', periods=T).strftime('%Y-%m-%d %X'))
print("Timestamp: ", timestamp)

Timestamp:  ['2021-01-01 00:00:00', '2021-01-02 00:00:00', '2021-01-03 00:00:00', '2021-01-04 00:00:00', '2021-01-05 00:00:00', '2021-01-06 00:00:00', '2021-01-07 00:00:00', '2021-01-08 00:00:00', '2021-01-09 00:00:00', '2021-01-10 00:00:00']

Dimension



In [1]:
# dimension
dim_label = ["FirstDimension"]
print("Dimension label: ", dim_label)

Dimension label:  ['FirstDimension']

Features in different format



In [1]:
ID = "FromDataFrame"
d = {"feature0": np.arange(10), "feature1": np.arange(10,20)}
df = pd.DataFrame(data=d)

arr_feature0 = np.arange(10)
arr_feature1 = np.arange(10,20)
dict_features = {"feature0" : arr_feature0, "feature1" : arr_feature1}

#### Format for multiple features



In [1]:
TSdf1 = DataFormat.df_to_TSdf(
    df, ID=ID, timestamp=timestamp, dim_label=dim_label
)

TSdf2 = DataFormat.dict_to_TSdf(
    dict_features, ID=ID, timestamp=timestamp, dim_label=dim_label
)

# Vizualize
loader.add_data(ID=ID, data=TSdf1, collision="overwrite")

# assert they are equal removing indexed columns for order problem
assert loader.get_df().equals(TSdf2)

In [1]:
loader.write()