In [1]:
import pandas as pd
import numpy as np
from datetime import datetime as dt, timedelta as td

import json
import random
import string

from copy import deepcopy as dc
import os

from TSFEFS import *


# Some of the imports / exports were tested / used in the previous tests.
# Nevermind, we'll do all purposely this time.

# Import Test Strategy
#  - Create a base, which is a skin with empty content.
#  - Each test will clone the skin and import different contents.

In [2]:
dict_meta = {
    "piece_name_len": 8,
    "seq_col": "time", "datetime_format": "%Y-%m-%d %H:%M:%S", 
    "max_row_per_piece": 20000,
    "colnames":["time","day","num"],
    "cache_config":{"rows_in_cache":None,"len_of_cache":3}
}


In [3]:
fr, to = "2020-01-01 00:00:00", "2020-01-02 00:00:00"
fr = dt.strptime(fr,dict_meta["datetime_format"])
to = dt.strptime(to,dict_meta["datetime_format"])
ts_gap = (to - fr)
seconds = ts_gap.seconds + ts_gap.days*24*60*60
# print(seconds)

tss = [ fr + td(seconds=i) for i in range(seconds+1) ]
print(min(tss),max(tss))
seq_col = dict_meta["seq_col"]
datetime_format = dict_meta["datetime_format"]
df = pd.DataFrame({seq_col:tss})
df["day"] = df[seq_col].apply(lambda x: int(x.strftime("%d")))
df["num"] = df[seq_col].apply(lambda x: int(x.strftime("%Y")) + int(x.strftime("%m")) + int(x.strftime("%d")) \
                     + int(x.strftime("%H")) + int(x.strftime("%M")) + int(x.strftime("%S")))


2020-01-01 00:00:00 2020-01-02 00:00:00


In [4]:
tsfefs_base = TSFEFS.create(dict_meta, "base")


# Import Test Plan
# 1. Import by dataframe
# 2. Import by srcfile.
# 3. Import by srcfolder.


In [5]:
tsfefs_im1 = tsfefs_base.clone(tsfefs_base.path, "im1")
tsfefs_im2 = tsfefs_base.clone(tsfefs_base.path, "im2")
tsfefs_im3 = tsfefs_base.clone(tsfefs_base.path, "im3")


# 1. Import by dataframe

In [6]:
tsfefs_im1.import_dataframe(df)
try:
    tsfefs_im1.print_info()
except:
    print("Can't print unless it's updated")


path: /Users/yeehinleung/Documents/GreekIsGood/Products/repos/TSFEFS/1. test TSFEFS , name: im1
pieces: ['iazlzalk', 'ulqodnyl', 'mrnbwcwr', 'fuphqnho', 'jwgxtvdd', 'mofjdciu', 'mbvprhqh', 'mbzdlcjn', 'iyxkjcyb']
types: ['csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv']
fr: 2020-01-01 00:00:00 , to: 2020-01-02 00:00:00
frs: [Timestamp('2020-01-01 00:00:00'), None, None, None, None, None, None, None, None]
tos: [Timestamp('2020-01-02 00:00:00'), None, None, None, None, None, None, None, None]
seq_col: time
piece_name_len: 8
colnames: ['time', 'day', 'num'] , row_cnt: 86401
row_cnts: [86401, None, None, None, None, None, None, None, None]
actions: ['update', 'update', 'update', 'update', 'update', 'update', 'update', 'update', 'update'] , action_params: [None, None, None, None, None, None, None, None, None]
cache: [1, 2, 3, 4, 5, 6, 7, 8, 0] , cache_config: {'rows_in_cache': None, 'len_of_cache': 3}
dfs: ['Non-emtpy', 'Non-emtpy', 'Non-emtpy', 'Non-emtpy', 'Non-emtpy', 'Non-

In [7]:
tsfefs_im1.take_actions(max_level=3)
tsfefs_im1.maintain_cache()
tsfefs_im1.print_info()


path: /Users/yeehinleung/Documents/GreekIsGood/Products/repos/TSFEFS/1. test TSFEFS , name: im1
pieces: ['iazlzalk', 'ulqodnyl', 'mrnbwcwr', 'fuphqnho', 'jwgxtvdd', 'mofjdciu', 'mbvprhqh', 'mbzdlcjn', 'iyxkjcyb']
types: ['csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv']
fr: 2020-01-01 00:00:00 , to: 2020-01-02 00:00:00
frs: ['2020-01-01 00:00:00', '2020-01-01 02:46:40', '2020-01-01 05:33:20', '2020-01-01 08:20:00', '2020-01-01 11:06:40', '2020-01-01 13:53:20', '2020-01-01 16:40:00', '2020-01-01 19:26:40', '2020-01-01 22:13:20']
tos: ['2020-01-01 02:46:39', '2020-01-01 05:33:19', '2020-01-01 08:19:59', '2020-01-01 11:06:39', '2020-01-01 13:53:19', '2020-01-01 16:39:59', '2020-01-01 19:26:39', '2020-01-01 22:13:19', '2020-01-02 00:00:00']
seq_col: time
piece_name_len: 8
colnames: ['time', 'day', 'num'] , row_cnt: 86401
row_cnts: [10000, 10000, 10000, 10000, 10000, 10000, 10000, 10000, 6401]
actions: ['', '', '', '', '', '', '', '', ''] , action_params: [None, None, None, Non

# 2. Import by srcfile.


In [8]:
if not os.path.isdir("import2"):
    os.mkdir("import2")
df_ = dc(df)
df_[seq_col] = df_[seq_col].apply(lambda x: x.strftime(datetime_format))
df_.to_csv("import2/import2.csv",index=False)


In [9]:
tsfefs_im2.import_srcfile("import2/import2.csv")
try:
    tsfefs_im2.print_info()
except:
    print("Can't print unless it's updated")


path: /Users/yeehinleung/Documents/GreekIsGood/Products/repos/TSFEFS/1. test TSFEFS , name: im2
pieces: ['ygpqipov', 'inelfcud', 'ukliagir', 'zaedgwqo', 'yscpehuf', 'qittjdgo', 'qzomkqsh', 'fuqgqgly', 'mshbmsui']
types: ['csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv']
fr: 2020-01-01 00:00:00 , to: 2020-01-02 00:00:00
frs: [Timestamp('2020-01-01 00:00:00'), None, None, None, None, None, None, None, None]
tos: [Timestamp('2020-01-02 00:00:00'), None, None, None, None, None, None, None, None]
seq_col: time
piece_name_len: 8
colnames: ['time', 'day', 'num'] , row_cnt: 86401
row_cnts: [86401, None, None, None, None, None, None, None, None]
actions: ['update', 'update', 'update', 'update', 'update', 'update', 'update', 'update', 'update'] , action_params: [None, None, None, None, None, None, None, None, None]
cache: [1, 2, 3, 4, 5, 6, 7, 8, 0] , cache_config: {'rows_in_cache': None, 'len_of_cache': 3}
dfs: ['Non-emtpy', 'Non-emtpy', 'Non-emtpy', 'Non-emtpy', 'Non-emtpy', 'Non-

In [10]:
tsfefs_im2.take_actions(max_level=3)
tsfefs_im2.maintain_cache()
tsfefs_im2.print_info()


path: /Users/yeehinleung/Documents/GreekIsGood/Products/repos/TSFEFS/1. test TSFEFS , name: im2
pieces: ['ygpqipov', 'inelfcud', 'ukliagir', 'zaedgwqo', 'yscpehuf', 'qittjdgo', 'qzomkqsh', 'fuqgqgly', 'mshbmsui']
types: ['csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv']
fr: 2020-01-01 00:00:00 , to: 2020-01-02 00:00:00
frs: ['2020-01-01 00:00:00', '2020-01-01 02:46:40', '2020-01-01 05:33:20', '2020-01-01 08:20:00', '2020-01-01 11:06:40', '2020-01-01 13:53:20', '2020-01-01 16:40:00', '2020-01-01 19:26:40', '2020-01-01 22:13:20']
tos: ['2020-01-01 02:46:39', '2020-01-01 05:33:19', '2020-01-01 08:19:59', '2020-01-01 11:06:39', '2020-01-01 13:53:19', '2020-01-01 16:39:59', '2020-01-01 19:26:39', '2020-01-01 22:13:19', '2020-01-02 00:00:00']
seq_col: time
piece_name_len: 8
colnames: ['time', 'day', 'num'] , row_cnt: 86401
row_cnts: [10000, 10000, 10000, 10000, 10000, 10000, 10000, 10000, 6401]
actions: ['', '', '', '', '', '', '', '', ''] , action_params: [None, None, None, Non

# 3. Import by srcfolder.

In [11]:
if not os.path.isdir("import3"):
    os.mkdir("import3")

df_.to_csv("import3/import3_1.csv",index=False)

# let df2 be all time shifted 1 day
df2 = dc(df)
df2["time"] = df2["time"].apply(lambda x: x + td(days=1))

df_ = dc(df2)
df_[seq_col] = df_[seq_col].apply(lambda x: x.strftime(datetime_format))
df_.to_csv("import3/import3_2.csv",index=False)


In [12]:
tsfefs_im3.import_srcfolder("import3")
try:
    tsfefs_im3.print_info()
except:
    print("Can't print unless it's updated")


path: /Users/yeehinleung/Documents/GreekIsGood/Products/repos/TSFEFS/1. test TSFEFS , name: im3
pieces: ['ruzwiqno', 'vrbfqjpa']
types: ['csv', 'csv']
fr: 2020-01-01 00:00:00 , to: 2020-01-03 00:00:00
frs: ['2020-01-01 00:00:00', '2020-01-02 00:00:00']
tos: ['2020-01-02 00:00:00', '2020-01-03 00:00:00']
seq_col: time
piece_name_len: 8
colnames: ['time', 'day', 'num'] , row_cnt: 172802
row_cnts: [86401, 86401]
actions: ['', ''] , action_params: [None, None]
cache: [] , cache_config: {'rows_in_cache': None, 'len_of_cache': 3}
dfs: [None, None]
datetime_format: %Y-%m-%d %H:%M:%S
seq_col dtype used in read_csv: <class 'str'>
seq_col date type (in tuple): (<class 'datetime.datetime'>, <class 'pandas._libs.tslibs.timestamps.Timestamp'>)


In [13]:
tsfefs_im3.take_actions(max_level=3)
tsfefs_im3.maintain_cache()
tsfefs_im3.print_info()


path: /Users/yeehinleung/Documents/GreekIsGood/Products/repos/TSFEFS/1. test TSFEFS , name: im3
pieces: ['ruzwiqno', 'vrbfqjpa']
types: ['csv', 'csv']
fr: 2020-01-01 00:00:00 , to: 2020-01-03 00:00:00
frs: ['2020-01-01 00:00:00', '2020-01-02 00:00:00']
tos: ['2020-01-02 00:00:00', '2020-01-03 00:00:00']
seq_col: time
piece_name_len: 8
colnames: ['time', 'day', 'num'] , row_cnt: 172802
row_cnts: [86401, 86401]
actions: ['', ''] , action_params: [None, None]
cache: [] , cache_config: {'rows_in_cache': None, 'len_of_cache': 3}
dfs: [None, None]
datetime_format: %Y-%m-%d %H:%M:%S
seq_col dtype used in read_csv: <class 'str'>
seq_col date type (in tuple): (<class 'datetime.datetime'>, <class 'pandas._libs.tslibs.timestamps.Timestamp'>)


In [14]:
tsfefs_im3.remove()
tsfefs_im2.remove()
tsfefs_im1.remove()

for f in os.listdir("import3"):
    os.remove("import3/%s"%f)
os.rmdir("import3")

for f in os.listdir("import2"):
    os.remove("import2/%s"%f)
os.rmdir("import2")



# Export Test Strategy
#  - The base will be filled (imported) with df.
#  - Each test will clone the base and export different contents.

In [15]:
tsfefs_base.import_dataframe(df)
tsfefs_base.take_actions(max_level=3)
tsfefs_base.maintain_cache()
tsfefs_base.print_info()


path: /Users/yeehinleung/Documents/GreekIsGood/Products/repos/TSFEFS/1. test TSFEFS , name: base
pieces: ['abqjgvwv', 'ebcpyhzr', 'gokavuwy', 'gvrnasnr', 'kzttufxq', 'njilccgd', 'fvoazrvz', 'jyqpncdq', 'lbijsmsu']
types: ['csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv']
fr: 2020-01-01 00:00:00 , to: 2020-01-02 00:00:00
frs: ['2020-01-01 00:00:00', '2020-01-01 02:46:40', '2020-01-01 05:33:20', '2020-01-01 08:20:00', '2020-01-01 11:06:40', '2020-01-01 13:53:20', '2020-01-01 16:40:00', '2020-01-01 19:26:40', '2020-01-01 22:13:20']
tos: ['2020-01-01 02:46:39', '2020-01-01 05:33:19', '2020-01-01 08:19:59', '2020-01-01 11:06:39', '2020-01-01 13:53:19', '2020-01-01 16:39:59', '2020-01-01 19:26:39', '2020-01-01 22:13:19', '2020-01-02 00:00:00']
seq_col: time
piece_name_len: 8
colnames: ['time', 'day', 'num'] , row_cnt: 86401
row_cnts: [10000, 10000, 10000, 10000, 10000, 10000, 10000, 10000, 6401]
actions: ['', '', '', '', '', '', '', '', ''] , action_params: [None, None, None, No

# Export Test Plan
# 1. Export to dataframe
# 2. Export to dstfile
# 3. Export to dstfolder


In [16]:
tsfefs_ex1 = tsfefs_base.clone(tsfefs_base.path, "ex1")
tsfefs_ex2 = tsfefs_base.clone(tsfefs_base.path, "ex2")
tsfefs_ex3 = tsfefs_base.clone(tsfefs_base.path, "ex3")


# 1. Export to dataframe


In [17]:
df = tsfefs_ex1.export_dataframe()
tsfefs_ex1.print_info()


path: /Users/yeehinleung/Documents/GreekIsGood/Products/repos/TSFEFS/1. test TSFEFS , name: ex1
pieces: ['abqjgvwv', 'ebcpyhzr', 'gokavuwy', 'gvrnasnr', 'kzttufxq', 'njilccgd', 'fvoazrvz', 'jyqpncdq', 'lbijsmsu']
types: ['csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv']
fr: 2020-01-01 00:00:00 , to: 2020-01-02 00:00:00
frs: ['2020-01-01 00:00:00', '2020-01-01 02:46:40', '2020-01-01 05:33:20', '2020-01-01 08:20:00', '2020-01-01 11:06:40', '2020-01-01 13:53:20', '2020-01-01 16:40:00', '2020-01-01 19:26:40', '2020-01-01 22:13:20']
tos: ['2020-01-01 02:46:39', '2020-01-01 05:33:19', '2020-01-01 08:19:59', '2020-01-01 11:06:39', '2020-01-01 13:53:19', '2020-01-01 16:39:59', '2020-01-01 19:26:39', '2020-01-01 22:13:19', '2020-01-02 00:00:00']
seq_col: time
piece_name_len: 8
colnames: ['time', 'day', 'num'] , row_cnt: 86401
row_cnts: [10000, 10000, 10000, 10000, 10000, 10000, 10000, 10000, 6401]
actions: ['save', '', '', '', '', '', '', 'save', 'save'] , action_params: [None, Non

In [18]:
print(df)


                     time  day   num
0     2020-01-01 00:00:00    1  2022
1     2020-01-01 00:00:01    1  2023
2     2020-01-01 00:00:02    1  2024
3     2020-01-01 00:00:03    1  2025
4     2020-01-01 00:00:04    1  2026
...                   ...  ...   ...
86396 2020-01-01 23:59:56    1  2160
86397 2020-01-01 23:59:57    1  2161
86398 2020-01-01 23:59:58    1  2162
86399 2020-01-01 23:59:59    1  2163
86400 2020-01-02 00:00:00    2  2023

[86401 rows x 3 columns]


# 2. Export to dstfile


In [19]:
fullpath = "%s/export2"%os.getcwd()
if not os.path.isdir(fullpath):
    os.mkdir(fullpath)
dstfile = "%s/export2.csv"%fullpath 
print(dstfile)


/Users/yeehinleung/Documents/GreekIsGood/Products/repos/TSFEFS/1. test TSFEFS/export2/export2.csv


In [20]:
tsfefs_ex2.export_dstfile(dstfile)
tsfefs_ex2.print_info()


path: /Users/yeehinleung/Documents/GreekIsGood/Products/repos/TSFEFS/1. test TSFEFS , name: ex2
pieces: ['abqjgvwv', 'ebcpyhzr', 'gokavuwy', 'gvrnasnr', 'kzttufxq', 'njilccgd', 'fvoazrvz', 'jyqpncdq', 'lbijsmsu']
types: ['csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv']
fr: 2020-01-01 00:00:00 , to: 2020-01-02 00:00:00
frs: ['2020-01-01 00:00:00', '2020-01-01 02:46:40', '2020-01-01 05:33:20', '2020-01-01 08:20:00', '2020-01-01 11:06:40', '2020-01-01 13:53:20', '2020-01-01 16:40:00', '2020-01-01 19:26:40', '2020-01-01 22:13:20']
tos: ['2020-01-01 02:46:39', '2020-01-01 05:33:19', '2020-01-01 08:19:59', '2020-01-01 11:06:39', '2020-01-01 13:53:19', '2020-01-01 16:39:59', '2020-01-01 19:26:39', '2020-01-01 22:13:19', '2020-01-02 00:00:00']
seq_col: time
piece_name_len: 8
colnames: ['time', 'day', 'num'] , row_cnt: 86401
row_cnts: [10000, 10000, 10000, 10000, 10000, 10000, 10000, 10000, 6401]
actions: ['save', '', '', '', '', '', '', 'save', 'save'] , action_params: [None, Non

In [21]:
print(os.listdir(fullpath))
df = pd.read_csv(dstfile)
print(len(df))


['export2.csv']
86401


# 3. Export to dstfolder

In [22]:
dstfolder = "%s/export3"%os.getcwd()
print(dstfolder)


/Users/yeehinleung/Documents/GreekIsGood/Products/repos/TSFEFS/1. test TSFEFS/export3


In [23]:
try:
    tsfefs_ex3.print_info()
    tsfefs_ex3.export_dstfolder(dstfolder)
except:
    print()
    print("Can't export to folder with pending action(s).")


path: /Users/yeehinleung/Documents/GreekIsGood/Products/repos/TSFEFS/1. test TSFEFS , name: ex3
pieces: ['abqjgvwv', 'ebcpyhzr', 'gokavuwy', 'gvrnasnr', 'kzttufxq', 'njilccgd', 'fvoazrvz', 'jyqpncdq', 'lbijsmsu']
types: ['csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv']
fr: 2020-01-01 00:00:00 , to: 2020-01-02 00:00:00
frs: ['2020-01-01 00:00:00', '2020-01-01 02:46:40', '2020-01-01 05:33:20', '2020-01-01 08:20:00', '2020-01-01 11:06:40', '2020-01-01 13:53:20', '2020-01-01 16:40:00', '2020-01-01 19:26:40', '2020-01-01 22:13:20']
tos: ['2020-01-01 02:46:39', '2020-01-01 05:33:19', '2020-01-01 08:19:59', '2020-01-01 11:06:39', '2020-01-01 13:53:19', '2020-01-01 16:39:59', '2020-01-01 19:26:39', '2020-01-01 22:13:19', '2020-01-02 00:00:00']
seq_col: time
piece_name_len: 8
colnames: ['time', 'day', 'num'] , row_cnt: 86401
row_cnts: [10000, 10000, 10000, 10000, 10000, 10000, 10000, 10000, 6401]
actions: ['save', '', '', '', '', '', '', 'save', 'save'] , action_params: [None, Non

In [24]:
tsfefs_ex3.take_actions(max_level=4)
tsfefs_ex3.export_dstfolder(dstfolder)
tsfefs_ex3.print_info()


path: /Users/yeehinleung/Documents/GreekIsGood/Products/repos/TSFEFS/1. test TSFEFS , name: ex3
pieces: ['abqjgvwv', 'ebcpyhzr', 'gokavuwy', 'gvrnasnr', 'kzttufxq', 'njilccgd', 'fvoazrvz', 'jyqpncdq', 'lbijsmsu']
types: ['csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv']
fr: 2020-01-01 00:00:00 , to: 2020-01-02 00:00:00
frs: ['2020-01-01 00:00:00', '2020-01-01 02:46:40', '2020-01-01 05:33:20', '2020-01-01 08:20:00', '2020-01-01 11:06:40', '2020-01-01 13:53:20', '2020-01-01 16:40:00', '2020-01-01 19:26:40', '2020-01-01 22:13:20']
tos: ['2020-01-01 02:46:39', '2020-01-01 05:33:19', '2020-01-01 08:19:59', '2020-01-01 11:06:39', '2020-01-01 13:53:19', '2020-01-01 16:39:59', '2020-01-01 19:26:39', '2020-01-01 22:13:19', '2020-01-02 00:00:00']
seq_col: time
piece_name_len: 8
colnames: ['time', 'day', 'num'] , row_cnt: 86401
row_cnts: [10000, 10000, 10000, 10000, 10000, 10000, 10000, 10000, 6401]
actions: ['', '', '', '', '', '', '', '', ''] , action_params: [None, None, None, Non

In [25]:
files = sorted(os.listdir(dstfolder))
print(files)
print()
files = [ "%s/%s"%(dstfolder,f) for f in files ]
print([ len(pd.read_csv(f)) for f in files ])


['0. 2020-01-01 00:00:00 ~ 2020-01-01 02:46:39.csv', '1. 2020-01-01 02:46:40 ~ 2020-01-01 05:33:19.csv', '2. 2020-01-01 05:33:20 ~ 2020-01-01 08:19:59.csv', '3. 2020-01-01 08:20:00 ~ 2020-01-01 11:06:39.csv', '4. 2020-01-01 11:06:40 ~ 2020-01-01 13:53:19.csv', '5. 2020-01-01 13:53:20 ~ 2020-01-01 16:39:59.csv', '6. 2020-01-01 16:40:00 ~ 2020-01-01 19:26:39.csv', '7. 2020-01-01 19:26:40 ~ 2020-01-01 22:13:19.csv', '8. 2020-01-01 22:13:20 ~ 2020-01-02 00:00:00.csv']

[10000, 10000, 10000, 10000, 10000, 10000, 10000, 10000, 6401]


In [26]:
tsfefs_ex3.remove()
tsfefs_ex2.remove()
tsfefs_ex1.remove()

for f in os.listdir("export3"):
    os.remove("export3/%s"%f)
os.rmdir("export3")

for f in os.listdir("export2"):
    os.remove("export2/%s"%f)
os.rmdir("export2")


In [27]:
tsfefs_base.remove()
