## Use case



This is an example of how to use most of the methods in the package.
Most of cases are advanced.

This notebook uses data from `example_multiprocess`, make sure to run it first.



### Initialization



In [1]:
%load_ext autoreload
%autoreload 2
import numpy as np
import pandas as pd
from TSload import TSloader, DataFormat

In [2]:
path = "data/example_use_case/data"
datatype = "simulated"
split = ["0", "1"]
permission = "overwrite"  # Overwrite is used for repeated execution
loader = TSloader(path, datatype, permission=permission)

### Data operations



#### Add datatype



In [3]:
d = {"ID": np.hstack((["name1" for _ in range(5)], ["name2" for _ in range(5)])),
    "timestamp": list(map(str, range(0,10))),
     "feature1": list(range(10)), "feature2": list(range(10,20))}
df = pd.DataFrame(data=d)
loader.initialize_datatype(df=df)
display(loader.df)

Unnamed: 0_level_0,Unnamed: 1_level_0,feature1,feature2
ID,timestamp,Unnamed: 2_level_1,Unnamed: 3_level_1
name1,0,0,10
name1,1,1,11
name1,2,2,12
name1,3,3,13
name1,4,4,14
name2,5,5,15
name2,6,6,16
name2,7,7,17
name2,8,8,18
name2,9,9,19


#### Add ID



In [4]:
ID = "added_ID"
d = {"timestamp": list(map(str, range(0,5))), "feature1": list(range(5)) ,"feature2": list(range(10,15))}
df = pd.DataFrame(data=d)
loader.add_ID(df, ID=ID)
display(loader.metadata) # in memory

Unnamed: 0_level_0,split,IDs,features,start,test,test2
datatype,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
simulated,[],"[name1, name2, added_ID]","[feature2, feature1]",[2016-01-01],[0],[1]


#### Add feature



In [5]:
feature = "added_feature"
d = {"timestamp": list(map(str, range(10))), feature: list(range(10))}
df = pd.DataFrame(data=d)
loader.add_feature(df, ID="added_ID", feature=feature)
loader.metadata

Unnamed: 0_level_0,split,IDs,features,start,test,test2
datatype,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
simulated,[],"[name1, name2, added_ID]","[feature2, feature1]",[2016-01-01],[0],[1]


#### Remove data



In [6]:
empty_loader = TSloader(path, datatype, permission=permission)
empty_loader.df

Unnamed: 0_level_0,Unnamed: 1_level_0,feature1,feature2,added_feature
ID,timestamp,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
name1,0,0.0,10.0,
name1,1,1.0,11.0,
name1,2,2.0,12.0,
name1,3,3.0,13.0,
name1,4,4.0,14.0,
name2,5,5.0,15.0,
name2,6,6.0,16.0,
name2,7,7.0,17.0,
name2,8,8.0,18.0,
name2,9,9.0,19.0,


In [7]:
empty_loader.rm_datatype()
assert len(empty_loader.df) == 0

### Metadata operations



#### Add metadata



In [8]:
loader.overwrite_metadata(start="2016-01-01")
loader.add_metadata(start="2016-01-01")
loader.add_metadata(test=["0", "0"], test2=["1", "1"])
loader.metadata

Unnamed: 0_level_0,split,IDs,features,start,test,test2
datatype,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
simulated,[],"[name1, name2, added_ID]","[feature2, feature1]",[2016-01-01],[0],[1]


Don't forget to write the changes on the disk



In [9]:
loader.write()

### Dataset operations



*Execution order here is important.*



#### Instantiate



In [10]:
data_path = "data/example_use_case/data"
multiprocess_path = "data/example_multiprocess"
copy_path = "data/example_use_case/copy"
move_path = "data/example_use_case/move"
merge_path = "data/example_use_case/example_merge"
permission = "overwrite"
data_loader = TSloader(data_path, datatype, permission=permission)
multiprocess_loader = TSloader(multiprocess_path, datatype, permission=permission)
print("Use case metadata")
print("-----------------")
display(data_loader.metadata)
print()
print("Multiprocess metadata")
print("---------------------")
display(multiprocess_loader.metadata)

Use case metadata
-----------------


Unnamed: 0_level_0,split,IDs,features,start,test,test2
datatype,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
simulated,[],"[name1, name2, added_ID]","[feature2, feature1]",[2016-01-01],[0],[1]



Multiprocess metadata
---------------------


Unnamed: 0,split,IDs,features
splitted_data,"[split1, split0]","[name1, name2]","[feature1, feature0]"
simulated,[],[],[]


##### Copy the data to \`copy\_path\`



In [11]:
data_loader.copy_dataset(copy_path)

##### Move data to \`move\_path\`



In [12]:
data_loader.move_dataset(move_path)

##### Remove data from loader's path (\`move\_path\`) and set its path back



In [13]:
data_loader.rm_dataset()
data_loader.set_path(data_path)

##### Merging dataset



In [14]:
merge_loader = DataFormat.merge_dataset([data_loader, multiprocess_loader], merge_path)
print("Dataset are merged, here is the metadata")
display(merge_loader.metadata)

Dataset are merged, here is the metadata


Unnamed: 0,split,IDs,features
splitted_data,"[split0, split1]","[name1, name2]","[feature1, feature0]"
simulated,[],"[name1, name2, added_ID]","[feature2, feature1]"
