In [1]:
import pandas as pd
import numpy as np
from datetime import datetime as dt, timedelta as td

import json
import random
import string

from copy import deepcopy as dc
import os

from SafeTSFEFS import *


In [2]:
dict_meta = {
    "piece_name_len": 8,
    "seq_col": "time", "datetime_format": "%Y-%m-%d %H:%M:%S", 
    "max_row_per_piece": 400000,
    "colnames":["time", "day"],
    "cache_config":{"rows_in_cache":None,"len_of_cache":3}
}


In [3]:
path = os.getcwd()
name = "read1"
fullpath = "%s/%s.%s"%(path,name,TSFEFS.extension)
if not os.path.isdir(fullpath):
    os.mkdir(fullpath)

json.dump(dict_meta, open("%s/%s"%(fullpath,TSFEFS.meta_json_name),'w'))


In [4]:
fr, to = "2020-01-01 00:00:00", "2020-01-02 00:00:00"
datetime_format = dict_meta["datetime_format"]
fr = dt.strptime(fr,datetime_format)
to = dt.strptime(to,datetime_format)
ts_gap = (to - fr)
seconds = ts_gap.seconds + ts_gap.days*24*60*60
# print(seconds)



# Read Test Plan
# 1. Read from a .tsfefs folder with non-empty df_index with pieces with conflicts.


In [5]:
tss = [ fr + td(seconds=i) for i in range(seconds+1) ]
print(min(tss),max(tss))
df = pd.DataFrame({dict_meta["seq_col"]:tss})
df["day"] = df["time"].apply(lambda x: int(x.strftime("%d")))

df2 = dc(df)
df2["time"] = df2["time"].apply(lambda x: x + td(days=0.5))
df2["day"] = df2["time"].apply(lambda x: int(x.strftime("%d")))

df_index = pd.DataFrame(
    {"piece":["abcdabcd","efghefgh"],"type":["csv","csv"],
     "fr":[ min(df["time"]).strftime(datetime_format), min(df2["time"]).strftime(datetime_format) ],
     "to":[ max(df["time"]).strftime(datetime_format), max(df2["time"]).strftime(datetime_format) ],
     "row_cnt":[ len(df), len(df2) ]})


2020-01-01 00:00:00 2020-01-02 00:00:00


In [6]:
path = os.getcwd()
name = "read1"
fullpath = "%s/%s.%s"%(path,name,TSFEFS.extension)
if not os.path.isdir(fullpath):
    os.mkdir(fullpath)

json.dump(dict_meta, open("%s/%s"%(fullpath,TSFEFS.meta_json_name),'w'))
df_index.to_csv("%s/%s"%(fullpath,TSFEFS.index_df_name), index=False)

df_ = dc(df)
df_["time"] = df_["time"].apply(lambda x: x.strftime(datetime_format))
df_.to_csv("%s/%s"%(fullpath,"abcdabcd"),index=False)

df_ = dc(df2)
df_["time"] = df_["time"].apply(lambda x: x.strftime(datetime_format))
df_.to_csv("%s/%s"%(fullpath,"efghefgh"),index=False)


In [7]:
stsfefs_read1 = SafeTSFEFS("not_a_path","not_a_name")
stsfefs_read1.read(fullpath)
stsfefs_read1.print_info()


path: /Users/yeehinleung/Documents/GreekIsGood/Products/repos/TSFEFS/2. test SafeTSFEFS , name: read1
pieces: ['uxzogdmj', 'fsjvqpub', 'fvcalzqv']
types: ['csv', 'csv', 'csv']
fr: 2020-01-01 00:00:00 , to: 2020-01-02 12:00:00
frs: ['2020-01-01 00:00:00', '2020-01-02 00:00:01', '2020-01-01 12:00:00']
tos: ['2020-01-01 11:59:59', '2020-01-02 12:00:00', '2020-01-02 00:00:00']
seq_col: time
piece_name_len: 8
colnames: ['time', 'day'] , row_cnt: 172802
row_cnts: [43200, 43200, 86402]
actions: ['', '', ''] , action_params: [None, None, None]
cache: [2, 1, 0] , cache_config: {'rows_in_cache': None, 'len_of_cache': 3}
dfs: ['Non-emtpy', 'Non-emtpy', 'Non-emtpy']
datetime_format: %Y-%m-%d %H:%M:%S
seq_col dtype used in read_csv: <class 'str'>
seq_col date type (in tuple): (<class 'datetime.datetime'>, <class 'pandas._libs.tslibs.timestamps.Timestamp'>)


# Write Test Plan
# 1. Create a stsfefs from df1, add a conflicting df, write to write1.


In [8]:
stsfefs_write1 = SafeTSFEFS(os.getcwd(), "write1", seq_col='time', datetime_format=datetime_format, df=df)
stsfefs_write1.print_info()


path: /Users/yeehinleung/Documents/GreekIsGood/Products/repos/TSFEFS/2. test SafeTSFEFS , name: write1
pieces: ['veylvndm']
types: ['csv']
fr: 2020-01-01 00:00:00 , to: 2020-01-02 00:00:00
frs: ['2020-01-01 00:00:00']
tos: ['2020-01-02 00:00:00']
seq_col: time
piece_name_len: 8
colnames: ['time', 'day'] , row_cnt: 86401
row_cnts: [86401]
actions: [''] , action_params: [None]
cache: [0] , cache_config: {'rows_in_cache': None, 'len_of_cache': 3}
dfs: ['Non-emtpy']
datetime_format: %Y-%m-%d %H:%M:%S
seq_col dtype used in read_csv: <class 'str'>
seq_col date type (in tuple): (<class 'datetime.datetime'>, <class 'pandas._libs.tslibs.timestamps.Timestamp'>)


In [9]:
stsfefs_write1 += df2
stsfefs_write1.print_info()


path: /Users/yeehinleung/Documents/GreekIsGood/Products/repos/TSFEFS/2. test SafeTSFEFS , name: write1
pieces: ['vzcisepb']
types: ['csv']
fr: 2020-01-01 00:00:00 , to: 2020-01-02 12:00:00
frs: ['2020-01-01 00:00:00']
tos: ['2020-01-02 12:00:00']
seq_col: time
piece_name_len: 8
colnames: ['time', 'day'] , row_cnt: 172802
row_cnts: [172802]
actions: [''] , action_params: [None]
cache: [0] , cache_config: {'rows_in_cache': None, 'len_of_cache': 3}
dfs: ['Non-emtpy']
datetime_format: %Y-%m-%d %H:%M:%S
seq_col dtype used in read_csv: <class 'str'>
seq_col date type (in tuple): (<class 'datetime.datetime'>, <class 'pandas._libs.tslibs.timestamps.Timestamp'>)


In [10]:
stsfefs_write1.write()
stsfefs_write1.print_info()


path: /Users/yeehinleung/Documents/GreekIsGood/Products/repos/TSFEFS/2. test SafeTSFEFS , name: write1
pieces: ['vzcisepb']
types: ['csv']
fr: 2020-01-01 00:00:00 , to: 2020-01-02 12:00:00
frs: ['2020-01-01 00:00:00']
tos: ['2020-01-02 12:00:00']
seq_col: time
piece_name_len: 8
colnames: ['time', 'day'] , row_cnt: 172802
row_cnts: [172802]
actions: [''] , action_params: [None]
cache: [0] , cache_config: {'rows_in_cache': None, 'len_of_cache': 3}
dfs: ['Non-emtpy']
datetime_format: %Y-%m-%d %H:%M:%S
seq_col dtype used in read_csv: <class 'str'>
seq_col date type (in tuple): (<class 'datetime.datetime'>, <class 'pandas._libs.tslibs.timestamps.Timestamp'>)


In [11]:
stsfefs_write1.remove()
stsfefs_read1.remove()
