## Multiprocess



### Initialization



In [1]:
%load_ext autoreload
%autoreload 2
import numpy as np
import pandas as pd
from TSload import TSloader, LoadersProcess

In [2]:
path = "data/example_multiprocess"
split = ["split0", "split1"]
permission = "overwrite"  # Overwrite is used for repeated execution

In [3]:
datatype="splitted_data"
d = {"ID": np.hstack((["name1" for _ in range(5)], ["name2" for _ in range(5)])),
    "timestamp": list(map(str, range(0,10))),
     "feature0": list(range(10)), "feature1": list(range(10))}
df1 = pd.DataFrame(data=d).drop("feature1", axis=1)
df2 = df1.copy()
df2 = pd.DataFrame(data=d).drop("feature0", axis=1)
df2["timestamp"] = df2["timestamp"].map(int) + 10
display(df1)
display(df2)

Unnamed: 0,ID,timestamp,feature0
0,name1,0,0
1,name1,1,1
2,name1,2,2
3,name1,3,3
4,name1,4,4
5,name2,5,5
6,name2,6,6
7,name2,7,7
8,name2,8,8
9,name2,9,9


Unnamed: 0,ID,timestamp,feature1
0,name1,10,0
1,name1,11,1
2,name1,12,2
3,name1,13,3
4,name1,14,4
5,name2,15,5
6,name2,16,6
7,name2,17,7
8,name2,18,8
9,name2,19,9


### Multiprocessing writing



##### Set the splitting scheme



In [4]:
metaloader = TSloader(path, datatype, split=split, permission=permission)
metaloader.write()

In [5]:
metaloader.metadata

Unnamed: 0_level_0,split,IDs,features
datatype,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
splitted_data,"[split0, split1]","[name2, name1]","[feature1, feature0]"


##### Initialize the loaders



In [6]:
loader1 = TSloader(path, datatype, subsplit_indices=[0],
                   permission=permission, parallel=True)
loader2 = TSloader(path, datatype, subsplit_indices=[1],
                   permission=permission, parallel=True)

loader1.initialize_datatype(df1)
loader2.initialize_datatype(df2)

##### Write



In [7]:
p = LoadersProcess([loader1, loader2], lambda loader : print(loader.df))
p.start()
p.join()
p = LoadersProcess([loader1, loader2], lambda loader : loader.write())
p.start()
p.join()

                 feature0
ID    timestamp          
name1 0                 0
      1                 1
      2                 2
      3                 3
      4                 4
name2 5                 5
      6                 6
      7                 7
      8                 8
      9                 9
                 feature1
ID    timestamp          
name1 10                0
      11                1
      12                2
      13                3
      14                4
name2 15                5
      16                6
      17                7
      18                8
      19                9


In [8]:
metaloader.merge_metadata(rm=False)
display(metaloader.metadata)

Unnamed: 0_level_0,split,IDs,features
datatype,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
splitted_data,"[split1, split0]","[name1, name2]","[feature1, feature0]"


### Multiprocessing loading



In [9]:
loader1 = TSloader(path, datatype, 
                   permission=permission, parallel=False)
loader1.metadata

Unnamed: 0_level_0,split,IDs,features
datatype,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
splitted_data,"[split1, split0]","[name1, name2]","[feature1, feature0]"


In [10]:
loader1 = TSloader(path, datatype, subsplit_indices=[0],
                   permission=permission, parallel=False)
loader2 = TSloader(path, datatype, subsplit_indices=[1],
                   permission=permission, parallel=False)

In [11]:
p = LoadersProcess([loader1, loader2], lambda loader : loader.load())
p.start()
p.join()
p = LoadersProcess([loader1, loader2], lambda loader : print(loader.df))
p.start()
p.join()

                 feature0
ID    timestamp          
name1 0                 0
      1                 1
      2                 2
      3                 3
      4                 4
name2 5                 5
      6                 6
      7                 7
      8                 8
      9                 9
                 feature1
ID    timestamp          
name1 10                0
      11                1
      12                2
      13                3
      14                4
name2 15                5
      16                6
      17                7
      18                8
      19                9
