In [1]:
import pandas as pd
import numpy as np
from datetime import datetime as dt, timedelta as td

import json
import random
import string

from copy import deepcopy as dc
import os

from TSFEFS import *


# Merge Test Preparation
#  - Create a base tsfefs, with timestamps, and columns "day" and "num".
#  - Create an "another", for both df and tsfefs.
#  - The "another" should have a dummy seq_col.
#  - The "another" will have columns "day", "num", and "day_square".

In [2]:
dict_meta_base = {
    "piece_name_len": 8,
    "seq_col": "time", "datetime_format": "%Y-%m-%d %H:%M:%S", 
    "max_row_per_piece": 200000,
    "colnames":["time", "day", "num"],
    "cache_config":{"rows_in_cache":None,"len_of_cache":3}
}

dict_meta_another = {
    "piece_name_len": 8,
    "seq_col": "dummy", "datetime_format": "%Y", 
    "max_row_per_piece": 200000,
    "colnames":["dummy", "day", "num", "day_square", "day_root"],
    "cache_config":{"rows_in_cache":None,"len_of_cache":3}
}


In [3]:
ts = "2020-01-01 00:00:00"
ts = dt.strptime(ts, dict_meta_base["datetime_format"])

tss = [ ts + td(seconds=i) for i in range(10**6) ]
print(min(tss),max(tss))
seq_col = dict_meta_base["seq_col"]
dtf_base = dict_meta_base["datetime_format"]
df_base = pd.DataFrame({seq_col:tss})
df_base["day"] = df_base[seq_col].apply(lambda x: int(x.strftime("%d")))
df_base["num"] = df_base[seq_col].apply(lambda x: int(x.strftime("%Y")) + int(x.strftime("%m")) + int(x.strftime("%d")) \
                     + int(x.strftime("%H")) + int(x.strftime("%M")) + int(x.strftime("%S")))


2020-01-01 00:00:00 2020-01-12 13:46:39


In [4]:
df_another = dc(df_base)
df_another = df_another[["day","num"]].drop_duplicates().reset_index(drop=True)
dtf_another = "%Y"
df_another["dummy"] = dt.strptime("2000","%Y")
df_another["day_square"] = df_another["day"].apply(lambda x: x**2)
df_another["day_root"] = df_another["day"].apply(lambda x: x**0.5)
print(df_another)


      day   num      dummy  day_square  day_root
0       1  2022 2000-01-01           1  1.000000
1       1  2023 2000-01-01           1  1.000000
2       1  2024 2000-01-01           1  1.000000
3       1  2025 2000-01-01           1  1.000000
4       1  2026 2000-01-01           1  1.000000
...   ...   ...        ...         ...       ...
1688   12  2159 2000-01-01         144  3.464102
1689   12  2160 2000-01-01         144  3.464102
1690   12  2161 2000-01-01         144  3.464102
1691   12  2162 2000-01-01         144  3.464102
1692   12  2163 2000-01-01         144  3.464102

[1693 rows x 5 columns]


In [5]:
tsfefs_base = TSFEFS.create(dict_meta_base, "base")
tsfefs_base.import_dataframe(df_base)
tsfefs_base.take_actions(max_level=3)
tsfefs_base.maintain_cache()
tsfefs_base.print_info()


path: /Users/yeehinleung/Documents/GreekIsGood/Products/repos/TSFEFS/1. test TSFEFS , name: base
pieces: ['yqxpqhcp', 'srsprklr', 'hdtyrhit', 'bikilqvk', 'jzvehbyr', 'snnvdzda', 'aiayjynw', 'yexxburl', 'tpndsmva', 'zdsxfldg']
types: ['csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv']
fr: 2020-01-01 00:00:00 , to: 2020-01-12 13:46:39
frs: ['2020-01-01 00:00:00', '2020-01-02 03:46:40', '2020-01-03 07:33:20', '2020-01-04 11:20:00', '2020-01-05 15:06:40', '2020-01-06 18:53:20', '2020-01-07 22:40:00', '2020-01-09 02:26:40', '2020-01-10 06:13:20', '2020-01-11 10:00:00']
tos: ['2020-01-02 03:46:39', '2020-01-03 07:33:19', '2020-01-04 11:19:59', '2020-01-05 15:06:39', '2020-01-06 18:53:19', '2020-01-07 22:39:59', '2020-01-09 02:26:39', '2020-01-10 06:13:19', '2020-01-11 09:59:59', '2020-01-12 13:46:39']
seq_col: time
piece_name_len: 8
colnames: ['time', 'day', 'num'] , row_cnt: 1000000
row_cnts: [100000, 100000, 100000, 100000, 100000, 100000, 100000, 100000, 100000, 100000]

In [6]:
tsfefs_another = TSFEFS.create(dict_meta_another, "another")
tsfefs_another.import_dataframe(df_another)
tsfefs_another.take_actions(max_level=3)
tsfefs_another.maintain_cache()
tsfefs_another.print_info()


path: /Users/yeehinleung/Documents/GreekIsGood/Products/repos/TSFEFS/1. test TSFEFS , name: another
pieces: ['cqtqoxpm']
types: ['csv']
fr: 2000-01-01 00:00:00 , to: 2000-01-01 00:00:00
frs: ['2000']
tos: ['2000']
seq_col: dummy
piece_name_len: 8
colnames: ['dummy', 'day', 'num', 'day_square', 'day_root'] , row_cnt: 1693
row_cnts: [1693]
actions: [''] , action_params: [None]
cache: [0] , cache_config: {'rows_in_cache': None, 'len_of_cache': 3}
dfs: ['Non-emtpy']
datetime_format: %Y
seq_col dtype used in read_csv: <class 'str'>
seq_col date type (in tuple): (<class 'datetime.datetime'>, <class 'pandas._libs.tslibs.timestamps.Timestamp'>)


# Merge w DataFrame Test Plan
# 1. Merge tsfefs base with df_another, on "day", target "day_square".
# 2. Merge tsfefs base with df_another, on "day", target ["day_square","day_root"].
# 3. Merge tsfefs base with df_another, on ["day","num], target "day_square".
# 4. Merge tsfefs base with df_another, on ["day","num], target ["day_square","day_root"].
# 5. Merge tsfefs base with df_another, on ["day","num], target = None.


In [7]:
tsfefs_merge1 = tsfefs_base.clone(tsfefs_base.path, "merge1")
tsfefs_merge2 = tsfefs_base.clone(tsfefs_base.path, "merge2")
tsfefs_merge3 = tsfefs_base.clone(tsfefs_base.path, "merge3")
tsfefs_merge4 = tsfefs_base.clone(tsfefs_base.path, "merge4")
tsfefs_merge5 = tsfefs_base.clone(tsfefs_base.path, "merge5")


# 1. Merge tsfefs base with df_another, on "day", target "day_square".


In [8]:
tsfefs_merge1 = tsfefs_merge1.merge(df_another,"day",target="day_square")
tsfefs_merge1.take_actions(max_level=3)
# tsfefs_merge1.maintain_cache()
tsfefs_merge1.print_info()


path: /Users/yeehinleung/Documents/GreekIsGood/Products/repos/TSFEFS/1. test TSFEFS , name: merge1
pieces: ['yqxpqhcp', 'srsprklr', 'hdtyrhit', 'bikilqvk', 'jzvehbyr', 'snnvdzda', 'aiayjynw', 'yexxburl', 'tpndsmva', 'zdsxfldg']
types: ['csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv']
fr: 2020-01-01 00:00:00 , to: 2020-01-12 13:46:39
frs: ['2020-01-01 00:00:00', '2020-01-02 03:46:40', '2020-01-03 07:33:20', '2020-01-04 11:20:00', '2020-01-05 15:06:40', '2020-01-06 18:53:20', '2020-01-07 22:40:00', '2020-01-09 02:26:40', '2020-01-10 06:13:20', '2020-01-11 10:00:00']
tos: ['2020-01-02 03:46:39', '2020-01-03 07:33:19', '2020-01-04 11:19:59', '2020-01-05 15:06:39', '2020-01-06 18:53:19', '2020-01-07 22:39:59', '2020-01-09 02:26:39', '2020-01-10 06:13:19', '2020-01-11 09:59:59', '2020-01-12 13:46:39']
seq_col: time
piece_name_len: 8
colnames: ['time', 'day', 'num', 'day_square'] , row_cnt: 1000000
row_cnts: [100000, 100000, 100000, 100000, 100000, 100000, 100000, 100000,

In [9]:
print(tsfefs_merge1.export_dataframe())


                      time  day   num  day_square
0      2020-01-01 00:00:00    1  2022           1
1      2020-01-01 00:00:01    1  2023           1
2      2020-01-01 00:00:02    1  2024           1
3      2020-01-01 00:00:03    1  2025           1
4      2020-01-01 00:00:04    1  2026           1
...                    ...  ...   ...         ...
999995 2020-01-12 13:46:35   12  2127         144
999996 2020-01-12 13:46:36   12  2128         144
999997 2020-01-12 13:46:37   12  2129         144
999998 2020-01-12 13:46:38   12  2130         144
999999 2020-01-12 13:46:39   12  2131         144

[1000000 rows x 4 columns]


# 2. Merge tsfefs base with df_another, on "day", target ["day_square","day_root"].


In [10]:
tsfefs_merge2 = tsfefs_merge2.merge(df_another,"day",target=["day_square","day_root"])
tsfefs_merge2.take_actions(max_level=3)
# tsfefs_merge2.maintain_cache()
tsfefs_merge2.print_info()


path: /Users/yeehinleung/Documents/GreekIsGood/Products/repos/TSFEFS/1. test TSFEFS , name: merge2
pieces: ['yqxpqhcp', 'srsprklr', 'hdtyrhit', 'bikilqvk', 'jzvehbyr', 'snnvdzda', 'aiayjynw', 'yexxburl', 'tpndsmva', 'zdsxfldg']
types: ['csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv']
fr: 2020-01-01 00:00:00 , to: 2020-01-12 13:46:39
frs: ['2020-01-01 00:00:00', '2020-01-02 03:46:40', '2020-01-03 07:33:20', '2020-01-04 11:20:00', '2020-01-05 15:06:40', '2020-01-06 18:53:20', '2020-01-07 22:40:00', '2020-01-09 02:26:40', '2020-01-10 06:13:20', '2020-01-11 10:00:00']
tos: ['2020-01-02 03:46:39', '2020-01-03 07:33:19', '2020-01-04 11:19:59', '2020-01-05 15:06:39', '2020-01-06 18:53:19', '2020-01-07 22:39:59', '2020-01-09 02:26:39', '2020-01-10 06:13:19', '2020-01-11 09:59:59', '2020-01-12 13:46:39']
seq_col: time
piece_name_len: 8
colnames: ['time', 'day', 'num', 'day_square', 'day_root'] , row_cnt: 1000000
row_cnts: [100000, 100000, 100000, 100000, 100000, 100000, 100

In [11]:
print(tsfefs_merge2.export_dataframe())


                      time  day   num  day_square  day_root
0      2020-01-01 00:00:00    1  2022           1  1.000000
1      2020-01-01 00:00:01    1  2023           1  1.000000
2      2020-01-01 00:00:02    1  2024           1  1.000000
3      2020-01-01 00:00:03    1  2025           1  1.000000
4      2020-01-01 00:00:04    1  2026           1  1.000000
...                    ...  ...   ...         ...       ...
999995 2020-01-12 13:46:35   12  2127         144  3.464102
999996 2020-01-12 13:46:36   12  2128         144  3.464102
999997 2020-01-12 13:46:37   12  2129         144  3.464102
999998 2020-01-12 13:46:38   12  2130         144  3.464102
999999 2020-01-12 13:46:39   12  2131         144  3.464102

[1000000 rows x 5 columns]


# 3. Merge tsfefs base with df_another, on ["day","num], target "day_square".


In [12]:
tsfefs_merge3 = tsfefs_merge3.merge(df_another,["day","num"],target="day_square")
tsfefs_merge3.take_actions(max_level=3)
# tsfefs_merge2.maintain_cache()
tsfefs_merge3.print_info()


path: /Users/yeehinleung/Documents/GreekIsGood/Products/repos/TSFEFS/1. test TSFEFS , name: merge3
pieces: ['yqxpqhcp', 'srsprklr', 'hdtyrhit', 'bikilqvk', 'jzvehbyr', 'snnvdzda', 'aiayjynw', 'yexxburl', 'tpndsmva', 'zdsxfldg']
types: ['csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv']
fr: 2020-01-01 00:00:00 , to: 2020-01-12 13:46:39
frs: ['2020-01-01 00:00:00', '2020-01-02 03:46:40', '2020-01-03 07:33:20', '2020-01-04 11:20:00', '2020-01-05 15:06:40', '2020-01-06 18:53:20', '2020-01-07 22:40:00', '2020-01-09 02:26:40', '2020-01-10 06:13:20', '2020-01-11 10:00:00']
tos: ['2020-01-02 03:46:39', '2020-01-03 07:33:19', '2020-01-04 11:19:59', '2020-01-05 15:06:39', '2020-01-06 18:53:19', '2020-01-07 22:39:59', '2020-01-09 02:26:39', '2020-01-10 06:13:19', '2020-01-11 09:59:59', '2020-01-12 13:46:39']
seq_col: time
piece_name_len: 8
colnames: ['time', 'day', 'num', 'day_square'] , row_cnt: 1000000
row_cnts: [100000, 100000, 100000, 100000, 100000, 100000, 100000, 100000,

In [13]:
print(tsfefs_merge3.export_dataframe())


                      time  day   num  day_square
0      2020-01-01 00:00:00    1  2022           1
1      2020-01-01 00:00:01    1  2023           1
2      2020-01-01 00:00:02    1  2024           1
3      2020-01-01 00:00:03    1  2025           1
4      2020-01-01 00:00:04    1  2026           1
...                    ...  ...   ...         ...
999995 2020-01-12 13:46:35   12  2127         144
999996 2020-01-12 13:46:36   12  2128         144
999997 2020-01-12 13:46:37   12  2129         144
999998 2020-01-12 13:46:38   12  2130         144
999999 2020-01-12 13:46:39   12  2131         144

[1000000 rows x 4 columns]


# 4. Merge tsfefs base with df_another, on ["day","num], target ["day_square","day_root"].


In [14]:
tsfefs_merge4 = tsfefs_merge4.merge(df_another,["day","num"],target=["day_square","day_root"])
tsfefs_merge4.take_actions(max_level=3)
# tsfefs_merge4.maintain_cache()
tsfefs_merge4.print_info()


path: /Users/yeehinleung/Documents/GreekIsGood/Products/repos/TSFEFS/1. test TSFEFS , name: merge4
pieces: ['yqxpqhcp', 'srsprklr', 'hdtyrhit', 'bikilqvk', 'jzvehbyr', 'snnvdzda', 'aiayjynw', 'yexxburl', 'tpndsmva', 'zdsxfldg']
types: ['csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv']
fr: 2020-01-01 00:00:00 , to: 2020-01-12 13:46:39
frs: ['2020-01-01 00:00:00', '2020-01-02 03:46:40', '2020-01-03 07:33:20', '2020-01-04 11:20:00', '2020-01-05 15:06:40', '2020-01-06 18:53:20', '2020-01-07 22:40:00', '2020-01-09 02:26:40', '2020-01-10 06:13:20', '2020-01-11 10:00:00']
tos: ['2020-01-02 03:46:39', '2020-01-03 07:33:19', '2020-01-04 11:19:59', '2020-01-05 15:06:39', '2020-01-06 18:53:19', '2020-01-07 22:39:59', '2020-01-09 02:26:39', '2020-01-10 06:13:19', '2020-01-11 09:59:59', '2020-01-12 13:46:39']
seq_col: time
piece_name_len: 8
colnames: ['time', 'day', 'num', 'day_square', 'day_root'] , row_cnt: 1000000
row_cnts: [100000, 100000, 100000, 100000, 100000, 100000, 100

In [15]:
print(tsfefs_merge4.export_dataframe())


                      time  day   num  day_square  day_root
0      2020-01-01 00:00:00    1  2022           1  1.000000
1      2020-01-01 00:00:01    1  2023           1  1.000000
2      2020-01-01 00:00:02    1  2024           1  1.000000
3      2020-01-01 00:00:03    1  2025           1  1.000000
4      2020-01-01 00:00:04    1  2026           1  1.000000
...                    ...  ...   ...         ...       ...
999995 2020-01-12 13:46:35   12  2127         144  3.464102
999996 2020-01-12 13:46:36   12  2128         144  3.464102
999997 2020-01-12 13:46:37   12  2129         144  3.464102
999998 2020-01-12 13:46:38   12  2130         144  3.464102
999999 2020-01-12 13:46:39   12  2131         144  3.464102

[1000000 rows x 5 columns]


# 5. Merge tsfefs base with df_another, on ["day","num], target = None.


In [16]:
tsfefs_merge5 = tsfefs_merge5.merge(df_another,["day","num"],target=None)
tsfefs_merge5.take_actions(max_level=3)
# tsfefs_merge4.maintain_cache()
tsfefs_merge5.print_info()


path: /Users/yeehinleung/Documents/GreekIsGood/Products/repos/TSFEFS/1. test TSFEFS , name: merge5
pieces: ['yqxpqhcp', 'srsprklr', 'hdtyrhit', 'bikilqvk', 'jzvehbyr', 'snnvdzda', 'aiayjynw', 'yexxburl', 'tpndsmva', 'zdsxfldg']
types: ['csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv']
fr: 2020-01-01 00:00:00 , to: 2020-01-12 13:46:39
frs: ['2020-01-01 00:00:00', '2020-01-02 03:46:40', '2020-01-03 07:33:20', '2020-01-04 11:20:00', '2020-01-05 15:06:40', '2020-01-06 18:53:20', '2020-01-07 22:40:00', '2020-01-09 02:26:40', '2020-01-10 06:13:20', '2020-01-11 10:00:00']
tos: ['2020-01-02 03:46:39', '2020-01-03 07:33:19', '2020-01-04 11:19:59', '2020-01-05 15:06:39', '2020-01-06 18:53:19', '2020-01-07 22:39:59', '2020-01-09 02:26:39', '2020-01-10 06:13:19', '2020-01-11 09:59:59', '2020-01-12 13:46:39']
seq_col: time
piece_name_len: 8
colnames: ['time', 'day', 'num', 'day_root', 'day_square', 'dummy'] , row_cnt: 1000000
row_cnts: [100000, 100000, 100000, 100000, 100000, 10

In [17]:
print(tsfefs_merge5.export_dataframe())


                      time  day   num  day_root  day_square   
0      2020-01-01 00:00:00    1  2022  1.000000           1  \
1      2020-01-01 00:00:01    1  2023  1.000000           1   
2      2020-01-01 00:00:02    1  2024  1.000000           1   
3      2020-01-01 00:00:03    1  2025  1.000000           1   
4      2020-01-01 00:00:04    1  2026  1.000000           1   
...                    ...  ...   ...       ...         ...   
999995 2020-01-12 13:46:35   12  2127  3.464102         144   
999996 2020-01-12 13:46:36   12  2128  3.464102         144   
999997 2020-01-12 13:46:37   12  2129  3.464102         144   
999998 2020-01-12 13:46:38   12  2130  3.464102         144   
999999 2020-01-12 13:46:39   12  2131  3.464102         144   

                      dummy  
0       2000-01-01 00:00:00  
1       2000-01-01 00:00:00  
2       2000-01-01 00:00:00  
3       2000-01-01 00:00:00  
4       2000-01-01 00:00:00  
...                     ...  
999995  2000-01-01 00:00:00  
999

In [18]:
tsfefs_merge5.remove()
tsfefs_merge4.remove()
tsfefs_merge3.remove()
tsfefs_merge2.remove()
tsfefs_merge1.remove()


# Merge w TSFEFS Test Plan
# 6. Merge tsfefs base with tsfefs_another, on "day", target "day_square".
# 7. Merge tsfefs base with tsfefs_another, on "day", target ["day_square","day_root"].
# 8. Merge tsfefs base with tsfefs_another, on ["day","num], target "day_square".
# 9. Merge tsfefs base with tsfefs_another, on ["day","num], target ["day_square","day_root"].
# 10. Merge tsfefs base with tsfefs_another, on ["day","num], target = None.


In [19]:
tsfefs_merge6 = tsfefs_base.clone(tsfefs_base.path, "merge6")
tsfefs_merge7 = tsfefs_base.clone(tsfefs_base.path, "merge7")
tsfefs_merge8 = tsfefs_base.clone(tsfefs_base.path, "merge8")
tsfefs_merge9 = tsfefs_base.clone(tsfefs_base.path, "merge9")
tsfefs_merge10 = tsfefs_base.clone(tsfefs_base.path, "merge10")


# 6. Merge tsfefs base with tsfefs_another, on "day", target "day_square".


In [20]:
tsfefs_merge6 = tsfefs_merge6.merge(tsfefs_another,"day",target="day_square")
tsfefs_merge6.take_actions(max_level=3)
# tsfefs_merge4.maintain_cache()
tsfefs_merge6.print_info()


path: /Users/yeehinleung/Documents/GreekIsGood/Products/repos/TSFEFS/1. test TSFEFS , name: merge6
pieces: ['yqxpqhcp', 'srsprklr', 'hdtyrhit', 'bikilqvk', 'jzvehbyr', 'snnvdzda', 'aiayjynw', 'yexxburl', 'tpndsmva', 'zdsxfldg']
types: ['csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv']
fr: 2020-01-01 00:00:00 , to: 2020-01-12 13:46:39
frs: ['2020-01-01 00:00:00', '2020-01-02 03:46:40', '2020-01-03 07:33:20', '2020-01-04 11:20:00', '2020-01-05 15:06:40', '2020-01-06 18:53:20', '2020-01-07 22:40:00', '2020-01-09 02:26:40', '2020-01-10 06:13:20', '2020-01-11 10:00:00']
tos: ['2020-01-02 03:46:39', '2020-01-03 07:33:19', '2020-01-04 11:19:59', '2020-01-05 15:06:39', '2020-01-06 18:53:19', '2020-01-07 22:39:59', '2020-01-09 02:26:39', '2020-01-10 06:13:19', '2020-01-11 09:59:59', '2020-01-12 13:46:39']
seq_col: time
piece_name_len: 8
colnames: ['time', 'day', 'num', 'day_square'] , row_cnt: 1000000
row_cnts: [100000, 100000, 100000, 100000, 100000, 100000, 100000, 100000,

In [21]:
print(tsfefs_merge6.export_dataframe())


                      time  day   num  day_square
0      2020-01-01 00:00:00    1  2022           1
1      2020-01-01 00:00:01    1  2023           1
2      2020-01-01 00:00:02    1  2024           1
3      2020-01-01 00:00:03    1  2025           1
4      2020-01-01 00:00:04    1  2026           1
...                    ...  ...   ...         ...
999995 2020-01-12 13:46:35   12  2127         144
999996 2020-01-12 13:46:36   12  2128         144
999997 2020-01-12 13:46:37   12  2129         144
999998 2020-01-12 13:46:38   12  2130         144
999999 2020-01-12 13:46:39   12  2131         144

[1000000 rows x 4 columns]


# 7. Merge tsfefs base with tsfefs_another, on "day", target ["day_square","day_root"].


In [22]:
tsfefs_merge7 = tsfefs_merge7.merge(tsfefs_another,"day",target=["day_square","day_root"])
tsfefs_merge7.take_actions(max_level=3)
# tsfefs_merge4.maintain_cache()
tsfefs_merge7.print_info()


path: /Users/yeehinleung/Documents/GreekIsGood/Products/repos/TSFEFS/1. test TSFEFS , name: merge7
pieces: ['yqxpqhcp', 'srsprklr', 'hdtyrhit', 'bikilqvk', 'jzvehbyr', 'snnvdzda', 'aiayjynw', 'yexxburl', 'tpndsmva', 'zdsxfldg']
types: ['csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv']
fr: 2020-01-01 00:00:00 , to: 2020-01-12 13:46:39
frs: ['2020-01-01 00:00:00', '2020-01-02 03:46:40', '2020-01-03 07:33:20', '2020-01-04 11:20:00', '2020-01-05 15:06:40', '2020-01-06 18:53:20', '2020-01-07 22:40:00', '2020-01-09 02:26:40', '2020-01-10 06:13:20', '2020-01-11 10:00:00']
tos: ['2020-01-02 03:46:39', '2020-01-03 07:33:19', '2020-01-04 11:19:59', '2020-01-05 15:06:39', '2020-01-06 18:53:19', '2020-01-07 22:39:59', '2020-01-09 02:26:39', '2020-01-10 06:13:19', '2020-01-11 09:59:59', '2020-01-12 13:46:39']
seq_col: time
piece_name_len: 8
colnames: ['time', 'day', 'num', 'day_square', 'day_root'] , row_cnt: 1000000
row_cnts: [100000, 100000, 100000, 100000, 100000, 100000, 100

In [23]:
print(tsfefs_merge7.export_dataframe())


                      time  day   num  day_square  day_root
0      2020-01-01 00:00:00    1  2022           1  1.000000
1      2020-01-01 00:00:01    1  2023           1  1.000000
2      2020-01-01 00:00:02    1  2024           1  1.000000
3      2020-01-01 00:00:03    1  2025           1  1.000000
4      2020-01-01 00:00:04    1  2026           1  1.000000
...                    ...  ...   ...         ...       ...
999995 2020-01-12 13:46:35   12  2127         144  3.464102
999996 2020-01-12 13:46:36   12  2128         144  3.464102
999997 2020-01-12 13:46:37   12  2129         144  3.464102
999998 2020-01-12 13:46:38   12  2130         144  3.464102
999999 2020-01-12 13:46:39   12  2131         144  3.464102

[1000000 rows x 5 columns]


# 8. Merge tsfefs base with tsfefs_another, on ["day","num], target "day_square".


In [24]:
tsfefs_merge8 = tsfefs_merge8.merge(tsfefs_another,["day","num"],target="day_square")
tsfefs_merge8.take_actions(max_level=3)
# tsfefs_merge4.maintain_cache()
tsfefs_merge8.print_info()


path: /Users/yeehinleung/Documents/GreekIsGood/Products/repos/TSFEFS/1. test TSFEFS , name: merge8
pieces: ['yqxpqhcp', 'srsprklr', 'hdtyrhit', 'bikilqvk', 'jzvehbyr', 'snnvdzda', 'aiayjynw', 'yexxburl', 'tpndsmva', 'zdsxfldg']
types: ['csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv']
fr: 2020-01-01 00:00:00 , to: 2020-01-12 13:46:39
frs: ['2020-01-01 00:00:00', '2020-01-02 03:46:40', '2020-01-03 07:33:20', '2020-01-04 11:20:00', '2020-01-05 15:06:40', '2020-01-06 18:53:20', '2020-01-07 22:40:00', '2020-01-09 02:26:40', '2020-01-10 06:13:20', '2020-01-11 10:00:00']
tos: ['2020-01-02 03:46:39', '2020-01-03 07:33:19', '2020-01-04 11:19:59', '2020-01-05 15:06:39', '2020-01-06 18:53:19', '2020-01-07 22:39:59', '2020-01-09 02:26:39', '2020-01-10 06:13:19', '2020-01-11 09:59:59', '2020-01-12 13:46:39']
seq_col: time
piece_name_len: 8
colnames: ['time', 'day', 'num', 'day_square'] , row_cnt: 1000000
row_cnts: [100000, 100000, 100000, 100000, 100000, 100000, 100000, 100000,

In [25]:
print(tsfefs_merge8.export_dataframe())


                      time  day   num  day_square
0      2020-01-01 00:00:00    1  2022           1
1      2020-01-01 00:00:01    1  2023           1
2      2020-01-01 00:00:02    1  2024           1
3      2020-01-01 00:00:03    1  2025           1
4      2020-01-01 00:00:04    1  2026           1
...                    ...  ...   ...         ...
999995 2020-01-12 13:46:35   12  2127         144
999996 2020-01-12 13:46:36   12  2128         144
999997 2020-01-12 13:46:37   12  2129         144
999998 2020-01-12 13:46:38   12  2130         144
999999 2020-01-12 13:46:39   12  2131         144

[1000000 rows x 4 columns]


# 9. Merge tsfefs base with tsfefs_another, on ["day","num], target ["day_square","day_root"].


In [26]:
tsfefs_merge9 = tsfefs_merge9.merge(tsfefs_another,["day","num"],target=["day_square","day_root"])
tsfefs_merge9.take_actions(max_level=3)
# tsfefs_merge4.maintain_cache()
tsfefs_merge9.print_info()


path: /Users/yeehinleung/Documents/GreekIsGood/Products/repos/TSFEFS/1. test TSFEFS , name: merge9
pieces: ['yqxpqhcp', 'srsprklr', 'hdtyrhit', 'bikilqvk', 'jzvehbyr', 'snnvdzda', 'aiayjynw', 'yexxburl', 'tpndsmva', 'zdsxfldg']
types: ['csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv']
fr: 2020-01-01 00:00:00 , to: 2020-01-12 13:46:39
frs: ['2020-01-01 00:00:00', '2020-01-02 03:46:40', '2020-01-03 07:33:20', '2020-01-04 11:20:00', '2020-01-05 15:06:40', '2020-01-06 18:53:20', '2020-01-07 22:40:00', '2020-01-09 02:26:40', '2020-01-10 06:13:20', '2020-01-11 10:00:00']
tos: ['2020-01-02 03:46:39', '2020-01-03 07:33:19', '2020-01-04 11:19:59', '2020-01-05 15:06:39', '2020-01-06 18:53:19', '2020-01-07 22:39:59', '2020-01-09 02:26:39', '2020-01-10 06:13:19', '2020-01-11 09:59:59', '2020-01-12 13:46:39']
seq_col: time
piece_name_len: 8
colnames: ['time', 'day', 'num', 'day_square', 'day_root'] , row_cnt: 1000000
row_cnts: [100000, 100000, 100000, 100000, 100000, 100000, 100

In [27]:
print(tsfefs_merge9.export_dataframe())


                      time  day   num  day_square  day_root
0      2020-01-01 00:00:00    1  2022           1  1.000000
1      2020-01-01 00:00:01    1  2023           1  1.000000
2      2020-01-01 00:00:02    1  2024           1  1.000000
3      2020-01-01 00:00:03    1  2025           1  1.000000
4      2020-01-01 00:00:04    1  2026           1  1.000000
...                    ...  ...   ...         ...       ...
999995 2020-01-12 13:46:35   12  2127         144  3.464102
999996 2020-01-12 13:46:36   12  2128         144  3.464102
999997 2020-01-12 13:46:37   12  2129         144  3.464102
999998 2020-01-12 13:46:38   12  2130         144  3.464102
999999 2020-01-12 13:46:39   12  2131         144  3.464102

[1000000 rows x 5 columns]


# 10. Merge tsfefs base with tsfefs_another, on ["day","num], target = None.


In [28]:
tsfefs_merge10 = tsfefs_merge10.merge(tsfefs_another,["day","num"],target=None)
tsfefs_merge10.take_actions(max_level=3)
# tsfefs_merge4.maintain_cache()
tsfefs_merge10.print_info()


path: /Users/yeehinleung/Documents/GreekIsGood/Products/repos/TSFEFS/1. test TSFEFS , name: merge10
pieces: ['yqxpqhcp', 'srsprklr', 'hdtyrhit', 'bikilqvk', 'jzvehbyr', 'snnvdzda', 'aiayjynw', 'yexxburl', 'tpndsmva', 'zdsxfldg']
types: ['csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv']
fr: 2020-01-01 00:00:00 , to: 2020-01-12 13:46:39
frs: ['2020-01-01 00:00:00', '2020-01-02 03:46:40', '2020-01-03 07:33:20', '2020-01-04 11:20:00', '2020-01-05 15:06:40', '2020-01-06 18:53:20', '2020-01-07 22:40:00', '2020-01-09 02:26:40', '2020-01-10 06:13:20', '2020-01-11 10:00:00']
tos: ['2020-01-02 03:46:39', '2020-01-03 07:33:19', '2020-01-04 11:19:59', '2020-01-05 15:06:39', '2020-01-06 18:53:19', '2020-01-07 22:39:59', '2020-01-09 02:26:39', '2020-01-10 06:13:19', '2020-01-11 09:59:59', '2020-01-12 13:46:39']
seq_col: time
piece_name_len: 8
colnames: ['time', 'day', 'num', 'day_root', 'day_square'] , row_cnt: 1000000
row_cnts: [100000, 100000, 100000, 100000, 100000, 100000, 10

In [29]:
print(tsfefs_merge10.export_dataframe())


                      time  day   num  day_root  day_square
0      2020-01-01 00:00:00    1  2022  1.000000           1
1      2020-01-01 00:00:01    1  2023  1.000000           1
2      2020-01-01 00:00:02    1  2024  1.000000           1
3      2020-01-01 00:00:03    1  2025  1.000000           1
4      2020-01-01 00:00:04    1  2026  1.000000           1
...                    ...  ...   ...       ...         ...
999995 2020-01-12 13:46:35   12  2127  3.464102         144
999996 2020-01-12 13:46:36   12  2128  3.464102         144
999997 2020-01-12 13:46:37   12  2129  3.464102         144
999998 2020-01-12 13:46:38   12  2130  3.464102         144
999999 2020-01-12 13:46:39   12  2131  3.464102         144

[1000000 rows x 5 columns]


In [30]:
tsfefs_merge10.remove()
tsfefs_merge9.remove()
tsfefs_merge8.remove()
tsfefs_merge7.remove()
tsfefs_merge6.remove()


In [31]:
tsfefs_another.remove()
tsfefs_base.remove()
