In [1]:
import pandas as pd
import numpy as np
from datetime import datetime as dt, timedelta as td

import json
import random
import string

from copy import deepcopy as dc
import os

from SafeTSFEFS import *


# Del Test Strategy
#  - Create a base with 3 columns, at least 5 dfs.
#  - Clone to each test case.


In [2]:
dict_meta = {
    "piece_name_len": 8,
    "seq_col": "time", "datetime_format": "%Y-%m-%d %H:%M:%S", 
    "max_row_per_piece": 20000,
    "colnames":["time","day","num"],
    "cache_config":{"rows_in_cache":None,"len_of_cache":3}
}


In [3]:
fr, to = "2020-01-01 00:00:00", "2020-01-02 00:00:00"
fr = dt.strptime(fr,dict_meta["datetime_format"])
to = dt.strptime(to,dict_meta["datetime_format"])
ts_gap = (to - fr)
seconds = ts_gap.seconds + ts_gap.days*24*60*60
# print(seconds)

tss = [ fr + td(seconds=i) for i in range(seconds+1) ]
print(min(tss),max(tss))
seq_col = dict_meta["seq_col"]
df = pd.DataFrame({seq_col:tss})
df["day"] = df[seq_col].apply(lambda x: int(x.strftime("%d")))
df["num"] = df[seq_col].apply(lambda x: int(x.strftime("%Y")) + int(x.strftime("%m")) + int(x.strftime("%d")) \
                     + int(x.strftime("%H")) + int(x.strftime("%M")) + int(x.strftime("%S")))


2020-01-01 00:00:00 2020-01-02 00:00:00


In [4]:
tsfefs_dummy = TSFEFS.create(dict_meta, "dummy")
tsfefs_dummy.import_dataframe(df)
tsfefs_dummy.take_actions(max_level=3)
tsfefs_dummy.maintain_cache()

stsfefs_base = SafeTSFEFS(os.getcwd(), "base", tsfefs=tsfefs_dummy)
stsfefs_base.print_info()


path: /Users/yeehinleung/Documents/GreekIsGood/Products/repos/TSFEFS/2. test SafeTSFEFS , name: base
pieces: ['sonafuij', 'fiztikez', 'mpbkkift', 'rrfbqxih', 'upbyrxyk', 'ikguhtyg', 'ncwebixq', 'wnfhvytj', 'etovqeyn']
types: ['csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv']
fr: 2020-01-01 00:00:00 , to: 2020-01-02 00:00:00
frs: ['2020-01-01 00:00:00', '2020-01-01 02:46:40', '2020-01-01 05:33:20', '2020-01-01 08:20:00', '2020-01-01 11:06:40', '2020-01-01 13:53:20', '2020-01-01 16:40:00', '2020-01-01 19:26:40', '2020-01-01 22:13:20']
tos: ['2020-01-01 02:46:39', '2020-01-01 05:33:19', '2020-01-01 08:19:59', '2020-01-01 11:06:39', '2020-01-01 13:53:19', '2020-01-01 16:39:59', '2020-01-01 19:26:39', '2020-01-01 22:13:19', '2020-01-02 00:00:00']
seq_col: time
piece_name_len: 8
colnames: ['time', 'day', 'num'] , row_cnt: 86401
row_cnts: [10000, 10000, 10000, 10000, 10000, 10000, 10000, 10000, 6401]
actions: ['', '', '', '', '', '', '', '', ''] , action_params: [None, None, None

# Del Test Plan
# 1. Delete time column.
# 2. Delete single column.
# 3. Delete 2 columns.
# 4. Delete all columns.
# 5. Delete by some indices.
# 6. Delete by some bools.
# 7. Delete by some indices s.t. 1 dfs will be empty. Will file be removed?
# 8. Delete all those in cache, what will the cache become?
# 9. Delete entire by indices, what will remain?



In [5]:
stsfefs_del1 = stsfefs_base.clone(os.getcwd(), "del1")
stsfefs_del2 = stsfefs_base.clone(os.getcwd(), "del2")
stsfefs_del3 = stsfefs_base.clone(os.getcwd(), "del3")
stsfefs_del4 = stsfefs_base.clone(os.getcwd(), "del4")
stsfefs_del5 = stsfefs_base.clone(os.getcwd(), "del5")
stsfefs_del6 = stsfefs_base.clone(os.getcwd(), "del6")
stsfefs_del7 = stsfefs_base.clone(os.getcwd(), "del7")
stsfefs_del8 = stsfefs_base.clone(os.getcwd(), "del8")
stsfefs_del9 = stsfefs_base.clone(os.getcwd(), "del9")


# 1. Delete time column.


In [6]:
try:
    del stsfefs_del1[stsfefs_del1.tsfefs.seq_col]
except:
    print("No, you can't delete the seq col")


No, you can't delete the seq col


# 2. Delete single column.


In [7]:
del stsfefs_del2["num"]
print(stsfefs_del2.tsfefs.export_dataframe())
stsfefs_del2.print_info()


                     time  day
0     2020-01-01 00:00:00    1
1     2020-01-01 00:00:01    1
2     2020-01-01 00:00:02    1
3     2020-01-01 00:00:03    1
4     2020-01-01 00:00:04    1
...                   ...  ...
86396 2020-01-01 23:59:56    1
86397 2020-01-01 23:59:57    1
86398 2020-01-01 23:59:58    1
86399 2020-01-01 23:59:59    1
86400 2020-01-02 00:00:00    2

[86401 rows x 2 columns]
path: /Users/yeehinleung/Documents/GreekIsGood/Products/repos/TSFEFS/2. test SafeTSFEFS , name: del2
pieces: ['sonafuij', 'fiztikez', 'mpbkkift', 'rrfbqxih', 'upbyrxyk', 'ikguhtyg', 'ncwebixq', 'wnfhvytj', 'etovqeyn']
types: ['csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv']
fr: 2020-01-01 00:00:00 , to: 2020-01-02 00:00:00
frs: ['2020-01-01 00:00:00', '2020-01-01 02:46:40', '2020-01-01 05:33:20', '2020-01-01 08:20:00', '2020-01-01 11:06:40', '2020-01-01 13:53:20', '2020-01-01 16:40:00', '2020-01-01 19:26:40', '2020-01-01 22:13:20']
tos: ['2020-01-01 02:46:39', '2020-01-01 05:33:19'

# 3. Delete 2 columns.


In [8]:
del stsfefs_del3[["day","num"]]
print(stsfefs_del3.tsfefs.export_dataframe())
stsfefs_del3.print_info()


                     time
0     2020-01-01 00:00:00
1     2020-01-01 00:00:01
2     2020-01-01 00:00:02
3     2020-01-01 00:00:03
4     2020-01-01 00:00:04
...                   ...
86396 2020-01-01 23:59:56
86397 2020-01-01 23:59:57
86398 2020-01-01 23:59:58
86399 2020-01-01 23:59:59
86400 2020-01-02 00:00:00

[86401 rows x 1 columns]
path: /Users/yeehinleung/Documents/GreekIsGood/Products/repos/TSFEFS/2. test SafeTSFEFS , name: del3
pieces: ['sonafuij', 'fiztikez', 'mpbkkift', 'rrfbqxih', 'upbyrxyk', 'ikguhtyg', 'ncwebixq', 'wnfhvytj', 'etovqeyn']
types: ['csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv']
fr: 2020-01-01 00:00:00 , to: 2020-01-02 00:00:00
frs: ['2020-01-01 00:00:00', '2020-01-01 02:46:40', '2020-01-01 05:33:20', '2020-01-01 08:20:00', '2020-01-01 11:06:40', '2020-01-01 13:53:20', '2020-01-01 16:40:00', '2020-01-01 19:26:40', '2020-01-01 22:13:20']
tos: ['2020-01-01 02:46:39', '2020-01-01 05:33:19', '2020-01-01 08:19:59', '2020-01-01 11:06:39', '2020-01-01 

# 4. Delete all columns.


In [9]:
try:
    del stsfefs_del4[stsfefs_del4.tsfefs.colnames]
    print(stsfefs_del4.tsfefs.export_dataframe())
    stsfefs_del4.print_info()
except:
    print("No, time col can't be in the columns to be deleted")


No, time col can't be in the columns to be deleted


# 5. Delete by some indices.


<b> Let's delete the even indices

In [10]:
del stsfefs_del5[::2]
print(stsfefs_del5.tsfefs.export_dataframe())
stsfefs_del5.print_info()


                     time  day   num
0     2020-01-01 00:00:01    1  2023
1     2020-01-01 00:00:03    1  2025
2     2020-01-01 00:00:05    1  2027
3     2020-01-01 00:00:07    1  2029
4     2020-01-01 00:00:09    1  2031
...                   ...  ...   ...
43195 2020-01-01 23:59:51    1  2155
43196 2020-01-01 23:59:53    1  2157
43197 2020-01-01 23:59:55    1  2159
43198 2020-01-01 23:59:57    1  2161
43199 2020-01-01 23:59:59    1  2163

[43200 rows x 3 columns]
path: /Users/yeehinleung/Documents/GreekIsGood/Products/repos/TSFEFS/2. test SafeTSFEFS , name: del5
pieces: ['sonafuij', 'fiztikez', 'mpbkkift', 'rrfbqxih', 'upbyrxyk', 'ikguhtyg', 'ncwebixq', 'wnfhvytj', 'etovqeyn']
types: ['csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv']
fr: 2020-01-01 00:00:01 , to: 2020-01-01 23:59:59
frs: ['2020-01-01 00:00:01', '2020-01-01 02:46:41', '2020-01-01 05:33:21', '2020-01-01 08:20:01', '2020-01-01 11:06:41', '2020-01-01 13:53:21', '2020-01-01 16:40:01', '2020-01-01 19:26:41', '

# 6. Delete by some bools.


<b> Delete the same as 5 but using bools.

In [11]:
B = np.array(range(len(stsfefs_del6)))%2 == 0

del stsfefs_del6[B]
print(stsfefs_del6.tsfefs.export_dataframe())
stsfefs_del6.print_info()


                     time  day   num
0     2020-01-01 00:00:01    1  2023
1     2020-01-01 00:00:03    1  2025
2     2020-01-01 00:00:05    1  2027
3     2020-01-01 00:00:07    1  2029
4     2020-01-01 00:00:09    1  2031
...                   ...  ...   ...
43195 2020-01-01 23:59:51    1  2155
43196 2020-01-01 23:59:53    1  2157
43197 2020-01-01 23:59:55    1  2159
43198 2020-01-01 23:59:57    1  2161
43199 2020-01-01 23:59:59    1  2163

[43200 rows x 3 columns]
path: /Users/yeehinleung/Documents/GreekIsGood/Products/repos/TSFEFS/2. test SafeTSFEFS , name: del6
pieces: ['sonafuij', 'fiztikez', 'mpbkkift', 'rrfbqxih', 'upbyrxyk', 'ikguhtyg', 'ncwebixq', 'wnfhvytj', 'etovqeyn']
types: ['csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv']
fr: 2020-01-01 00:00:01 , to: 2020-01-01 23:59:59
frs: ['2020-01-01 00:00:01', '2020-01-01 02:46:41', '2020-01-01 05:33:21', '2020-01-01 08:20:01', '2020-01-01 11:06:41', '2020-01-01 13:53:21', '2020-01-01 16:40:01', '2020-01-01 19:26:41', '

# 7. Delete by some indices s.t. 1 dfs will be empty. Will file be removed?


<b> Let's delete the 1st dfs. 

In [12]:
f = stsfefs_del7.tsfefs.pieces[1]
print("File_supposed_to_be_deleted:", f)

fullpath = "%s/%s.%s"%(stsfefs_del7.tsfefs.path, stsfefs_del7.tsfefs.name, TSFEFS.extension)
fullname = "%s/%s"%(fullpath, f)
if os.path.isfile(fullname):
    print("File currently exists.")
else:
    print("File doesn't exist.")
    assert False


File_supposed_to_be_deleted: fiztikez
File currently exists.


In [13]:
indices = list(range(10000,20000))
del stsfefs_del7[indices]
print(stsfefs_del7.tsfefs.export_dataframe())
stsfefs_del7.print_info()


                     time  day   num
0     2020-01-01 00:00:00    1  2022
1     2020-01-01 00:00:01    1  2023
2     2020-01-01 00:00:02    1  2024
3     2020-01-01 00:00:03    1  2025
4     2020-01-01 00:00:04    1  2026
...                   ...  ...   ...
76396 2020-01-01 23:59:56    1  2160
76397 2020-01-01 23:59:57    1  2161
76398 2020-01-01 23:59:58    1  2162
76399 2020-01-01 23:59:59    1  2163
76400 2020-01-02 00:00:00    2  2023

[76401 rows x 3 columns]
path: /Users/yeehinleung/Documents/GreekIsGood/Products/repos/TSFEFS/2. test SafeTSFEFS , name: del7
pieces: ['sonafuij', 'mpbkkift', 'rrfbqxih', 'upbyrxyk', 'ikguhtyg', 'ncwebixq', 'wnfhvytj', 'etovqeyn']
types: ['csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv']
fr: 2020-01-01 00:00:00 , to: 2020-01-02 00:00:00
frs: ['2020-01-01 00:00:00', '2020-01-01 05:33:20', '2020-01-01 08:20:00', '2020-01-01 11:06:40', '2020-01-01 13:53:20', '2020-01-01 16:40:00', '2020-01-01 19:26:40', '2020-01-01 22:13:20']
tos: ['2020-01-01 02

In [14]:
if os.path.isfile(fullname):
    print("File still exists.")
    assert False
else:
    print("File removed.")


File removed.


# 8. Delete all those in cache, what will the cache become?


<b> In cache those are the 0th, 7th, and 8th dfs.

In [15]:
indices = list(range(10000)) + list(range(70000, 80000)) + list(range(80000, 86401))

del stsfefs_del8[indices]
print(stsfefs_del8.tsfefs.export_dataframe())
stsfefs_del8.print_info()


                     time  day   num
0     2020-01-01 02:46:40    1  2110
1     2020-01-01 02:46:41    1  2111
2     2020-01-01 02:46:42    1  2112
3     2020-01-01 02:46:43    1  2113
4     2020-01-01 02:46:44    1  2114
...                   ...  ...   ...
59995 2020-01-01 19:26:35    1  2102
59996 2020-01-01 19:26:36    1  2103
59997 2020-01-01 19:26:37    1  2104
59998 2020-01-01 19:26:38    1  2105
59999 2020-01-01 19:26:39    1  2106

[60000 rows x 3 columns]
path: /Users/yeehinleung/Documents/GreekIsGood/Products/repos/TSFEFS/2. test SafeTSFEFS , name: del8
pieces: ['fiztikez', 'mpbkkift', 'rrfbqxih', 'upbyrxyk', 'ikguhtyg', 'ncwebixq']
types: ['csv', 'csv', 'csv', 'csv', 'csv', 'csv']
fr: 2020-01-01 02:46:40 , to: 2020-01-01 19:26:39
frs: ['2020-01-01 02:46:40', '2020-01-01 05:33:20', '2020-01-01 08:20:00', '2020-01-01 11:06:40', '2020-01-01 13:53:20', '2020-01-01 16:40:00']
tos: ['2020-01-01 05:33:19', '2020-01-01 08:19:59', '2020-01-01 11:06:39', '2020-01-01 13:53:19', '2020-

# 9. Delete entire by indices, what will remain?


In [16]:
del stsfefs_del9[:]
print(stsfefs_del9.tsfefs.export_dataframe())
stsfefs_del9.print_info()


Empty DataFrame
Columns: []
Index: []
path: /Users/yeehinleung/Documents/GreekIsGood/Products/repos/TSFEFS/2. test SafeTSFEFS , name: del9
pieces: []
types: []
fr: None , to: None
frs: []
tos: []
seq_col: time
piece_name_len: 8
colnames: ['time', 'day', 'num'] , row_cnt: 0
row_cnts: []
actions: [] , action_params: []
cache: [] , cache_config: {'rows_in_cache': None, 'len_of_cache': 3}
dfs: []
datetime_format: %Y-%m-%d %H:%M:%S
seq_col dtype used in read_csv: <class 'str'>
seq_col date type (in tuple): (<class 'datetime.datetime'>, <class 'pandas._libs.tslibs.timestamps.Timestamp'>)


<b> Can such empty tsfefs still functioning well, like adding a df?

In [17]:
stsfefs_del9 += df
stsfefs_del9.print_info()


path: /Users/yeehinleung/Documents/GreekIsGood/Products/repos/TSFEFS/2. test SafeTSFEFS , name: del9
pieces: ['fkoxodbs', 'cyxliotm', 'pvlkxguw', 'kaotiryd', 'ueeafmaz', 'akkifvay', 'hvaptwgw', 'yrwkzssw', 'yrlkmepg']
types: ['csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv', 'csv']
fr: 2020-01-01 00:00:00 , to: 2020-01-02 00:00:00
frs: ['2020-01-01 00:00:00', '2020-01-01 02:46:40', '2020-01-01 05:33:20', '2020-01-01 08:20:00', '2020-01-01 11:06:40', '2020-01-01 13:53:20', '2020-01-01 16:40:00', '2020-01-01 19:26:40', '2020-01-01 22:13:20']
tos: ['2020-01-01 02:46:39', '2020-01-01 05:33:19', '2020-01-01 08:19:59', '2020-01-01 11:06:39', '2020-01-01 13:53:19', '2020-01-01 16:39:59', '2020-01-01 19:26:39', '2020-01-01 22:13:19', '2020-01-02 00:00:00']
seq_col: time
piece_name_len: 8
colnames: ['time', 'day', 'num'] , row_cnt: 86401
row_cnts: [10000, 10000, 10000, 10000, 10000, 10000, 10000, 10000, 6401]
actions: ['', '', '', '', '', '', '', '', ''] , action_params: [None, None, None

In [18]:
stsfefs_del9.remove()
stsfefs_del8.remove()
stsfefs_del7.remove()
stsfefs_del6.remove()
stsfefs_del5.remove()
stsfefs_del4.remove()
stsfefs_del3.remove()
stsfefs_del2.remove()
stsfefs_del1.remove()

stsfefs_base.remove()
tsfefs_dummy.remove()
