In [1]:
import pandas as pd
import numpy as np
from datetime import datetime as dt, timedelta as td

import json
import random
import string

from copy import deepcopy as dc
import os

from SafeTSFEFS import *


# Get Test Strategy: 
## - create a base stsfefs with >= 3 columns,
## - clone to each test case.

In [2]:
# dict_meta = {
#     "piece_name_len": 8,
#     "time_col": "time", "datetime_format": "%Y-%m-%d %H:%M:%S", 
#     "max_row_per_piece": 20000,
#     "colnames":["time","day","num"],
#     "cache_config":{"rows_in_cache":None,"len_of_cache":3}
# }

dict_meta = {
    "piece_name_len": 8,
    "datetime_format": "%Y-%m-%d %H:%M:%S", 
    "max_row_per_piece": 20000,
    "cache_config":{"rows_in_cache":None,"len_of_cache":3}
}


In [3]:
fr, to = "2020-01-01 00:00:00", "2020-01-02 00:00:00"
datetime_format = dict_meta["datetime_format"]
fr = dt.strptime(fr,datetime_format)
to = dt.strptime(to,datetime_format)
ts_gap = (to - fr)
seconds = ts_gap.seconds + ts_gap.days*24*60*60
# print(seconds)

tss = [ fr + td(seconds=i) for i in range(seconds+1) ]
print(min(tss),max(tss))
# time_col = dict_meta["time_col"]
df = pd.DataFrame({"time":tss})
df["day"] = df["time"].apply(lambda x: int(x.strftime("%d")))
df["num"] = df["time"].apply(lambda x: int(x.strftime("%Y")) + int(x.strftime("%m")) + int(x.strftime("%d")) \
                     + int(x.strftime("%H")) + int(x.strftime("%M")) + int(x.strftime("%S")))


2020-01-01 00:00:00 2020-01-02 00:00:00


In [4]:
stsfefs_base = SafeTSFEFS(os.getcwd(), "base", time_col="time", datetime_format=datetime_format, df=df)
stsfefs_base.print_tsfefs_info()


path: /Users/yeehinleung/Documents/GreekIsGood/Products/TSFEFS/test SafeTSFEFS , name: base
pieces: ['zlwjcvjg']
fr: 2020-01-01 00:00:00 , to: 2020-01-02 00:00:00
frs: ['2020-01-01 00:00:00']
tos: ['2020-01-02 00:00:00']
types: ['csv']
time_col: time , datetime_format: %Y-%m-%d %H:%M:%S
piece_name_len: 8
colnames: ['time', 'day', 'num'] , row_cnt: 86401
row_cnts: [86401]
actions: [''] , action_params: [None]
cache: [0] , cache_config: {'rows_in_cache': None, 'len_of_cache': 3}


# Get Test Plan
# 1. Get column ["day"], check type, check cache.
# 2. Get columns ["day","num"], check type, check cache.
# 3. Get some rows by indices, check cache.
# 4. Filter some timestamps, get those rows by booleans, check cache.
# 5. Get some rows, followed by getting columns ["day","num"], check cache.
# 6. Getting columns ["day","num"], followed by getting some rows, check cache. Compare with 5.


In [5]:
stsfefs_get1 = stsfefs_base.clone(stsfefs_base.tsfefs.path, "get1")
stsfefs_get2 = stsfefs_base.clone(stsfefs_base.tsfefs.path, "get2")
stsfefs_get3 = stsfefs_base.clone(stsfefs_base.tsfefs.path, "get3")
stsfefs_get4 = stsfefs_base.clone(stsfefs_base.tsfefs.path, "get4")
stsfefs_get5 = stsfefs_base.clone(stsfefs_base.tsfefs.path, "get5")
stsfefs_get6 = stsfefs_base.clone(stsfefs_base.tsfefs.path, "get6")


# 1. Get column ["day"], check type, check cache.


In [6]:
day = stsfefs_get1["day"]
print(day)
print(type(day))
stsfefs_get1.print_tsfefs_info()


0        1
1        1
2        1
3        1
4        1
        ..
86396    1
86397    1
86398    1
86399    1
86400    2
Name: day, Length: 86401, dtype: int64
<class 'pandas.core.series.Series'>
path: /Users/yeehinleung/Documents/GreekIsGood/Products/TSFEFS/test SafeTSFEFS , name: get1
pieces: ['zlwjcvjg']
fr: 2020-01-01 00:00:00 , to: 2020-01-02 00:00:00
frs: ['2020-01-01 00:00:00']
tos: ['2020-01-02 00:00:00']
types: ['csv']
time_col: time , datetime_format: %Y-%m-%d %H:%M:%S
piece_name_len: 8
colnames: ['time', 'day', 'num'] , row_cnt: 86401
row_cnts: [86401]
actions: [''] , action_params: [None]
cache: [0] , cache_config: {'rows_in_cache': None, 'len_of_cache': 3}


# 2. Get columns ["day","num"], check type, check cache.

In [7]:
df_ = stsfefs_get2[["day","num"]]
print(df_)
print(type(df_))
stsfefs_get2.print_tsfefs_info()


       day   num
0        1  2022
1        1  2023
2        1  2024
3        1  2025
4        1  2026
...    ...   ...
86396    1  2160
86397    1  2161
86398    1  2162
86399    1  2163
86400    2  2023

[86401 rows x 2 columns]
<class 'pandas.core.frame.DataFrame'>
path: /Users/yeehinleung/Documents/GreekIsGood/Products/TSFEFS/test SafeTSFEFS , name: get2
pieces: ['zlwjcvjg']
fr: 2020-01-01 00:00:00 , to: 2020-01-02 00:00:00
frs: ['2020-01-01 00:00:00']
tos: ['2020-01-02 00:00:00']
types: ['csv']
time_col: time , datetime_format: %Y-%m-%d %H:%M:%S
piece_name_len: 8
colnames: ['time', 'day', 'num'] , row_cnt: 86401
row_cnts: [86401]
actions: [''] , action_params: [None]
cache: [0] , cache_config: {'rows_in_cache': None, 'len_of_cache': 3}


# 3. Get some rows by indices, check cache.


<b> Since the cache from base is: [7, 8, 0], so let's try to touch the 5th, 6th, and 7th dfs.
   

In [8]:
df_ = stsfefs_get3[50001:79999]
print(df_)
print(type(df_))
stsfefs_get3.print_tsfefs_info()


                     time  day   num
50001 2020-01-01 13:53:21    1  2109
50002 2020-01-01 13:53:22    1  2110
50003 2020-01-01 13:53:23    1  2111
50004 2020-01-01 13:53:24    1  2112
50005 2020-01-01 13:53:25    1  2113
...                   ...  ...   ...
79994 2020-01-01 22:13:14    1  2071
79995 2020-01-01 22:13:15    1  2072
79996 2020-01-01 22:13:16    1  2073
79997 2020-01-01 22:13:17    1  2074
79998 2020-01-01 22:13:18    1  2075

[29998 rows x 3 columns]
<class 'pandas.core.frame.DataFrame'>
path: /Users/yeehinleung/Documents/GreekIsGood/Products/TSFEFS/test SafeTSFEFS , name: get3
pieces: ['zlwjcvjg']
fr: 2020-01-01 00:00:00 , to: 2020-01-02 00:00:00
frs: ['2020-01-01 00:00:00']
tos: ['2020-01-02 00:00:00']
types: ['csv']
time_col: time , datetime_format: %Y-%m-%d %H:%M:%S
piece_name_len: 8
colnames: ['time', 'day', 'num'] , row_cnt: 86401
row_cnts: [86401]
actions: [''] , action_params: [None]
cache: [0] , cache_config: {'rows_in_cache': None, 'len_of_cache': 3}


# 4. Filter some timestamps, get those rows by booleans, check cache.


<b> Target the times of the 3rd, 4th, and 6th dfs.<br>
<b> 3rd: ['2020-01-01 08:20:00', '2020-01-01 11:06:39']<br>
<b> 4th: ['2020-01-01 11:06:40', '2020-01-01 13:53:19']<br>
<b> 6th: ['2020-01-01 16:40:00', '2020-01-01 19:26:39']<br>

In [9]:
ts1 = dt.strptime("2020-01-01 10:00:01",dict_meta["datetime_format"])
ts2 = dt.strptime("2020-01-01 10:00:05",dict_meta["datetime_format"])
B1 = (stsfefs_get4 > ts1) & (stsfefs_get4 < ts2) # 3 records suppose

ts1 = dt.strptime("2020-01-01 12:00:01",dict_meta["datetime_format"])
ts2 = dt.strptime("2020-01-01 12:00:05",dict_meta["datetime_format"])
B2 = (stsfefs_get4 > ts1) & (stsfefs_get4 < ts2) # 3 records suppose

ts1 = dt.strptime("2020-01-01 17:00:01",dict_meta["datetime_format"])
ts2 = dt.strptime("2020-01-01 17:00:05",dict_meta["datetime_format"])
B3 = (stsfefs_get4 > ts1) & (stsfefs_get4 < ts2) # 3 records suppose

B = B1 | B2 | B3
print(sum(B))


9


In [10]:
df_ = stsfefs_get4[B]
print(df_)
print(type(df_))
stsfefs_get4.print_tsfefs_info()


                     time  day   num
36002 2020-01-01 10:00:02    1  2034
36003 2020-01-01 10:00:03    1  2035
36004 2020-01-01 10:00:04    1  2036
43202 2020-01-01 12:00:02    1  2036
43203 2020-01-01 12:00:03    1  2037
43204 2020-01-01 12:00:04    1  2038
61202 2020-01-01 17:00:02    1  2041
61203 2020-01-01 17:00:03    1  2042
61204 2020-01-01 17:00:04    1  2043
<class 'pandas.core.frame.DataFrame'>
path: /Users/yeehinleung/Documents/GreekIsGood/Products/TSFEFS/test SafeTSFEFS , name: get4
pieces: ['zlwjcvjg']
fr: 2020-01-01 00:00:00 , to: 2020-01-02 00:00:00
frs: ['2020-01-01 00:00:00']
tos: ['2020-01-02 00:00:00']
types: ['csv']
time_col: time , datetime_format: %Y-%m-%d %H:%M:%S
piece_name_len: 8
colnames: ['time', 'day', 'num'] , row_cnt: 86401
row_cnts: [86401]
actions: [''] , action_params: [None]
cache: [0] , cache_config: {'rows_in_cache': None, 'len_of_cache': 3}


# 5. Get some rows, followed by getting columns ["day","num"], check cache.


<b> Use the same B as case 4.

In [11]:
df_ = stsfefs_get5[B][["day","num"]]
print(df_)
print(type(df_))
stsfefs_get5.print_tsfefs_info()


       day   num
36002    1  2034
36003    1  2035
36004    1  2036
43202    1  2036
43203    1  2037
43204    1  2038
61202    1  2041
61203    1  2042
61204    1  2043
<class 'pandas.core.frame.DataFrame'>
path: /Users/yeehinleung/Documents/GreekIsGood/Products/TSFEFS/test SafeTSFEFS , name: get5
pieces: ['zlwjcvjg']
fr: 2020-01-01 00:00:00 , to: 2020-01-02 00:00:00
frs: ['2020-01-01 00:00:00']
tos: ['2020-01-02 00:00:00']
types: ['csv']
time_col: time , datetime_format: %Y-%m-%d %H:%M:%S
piece_name_len: 8
colnames: ['time', 'day', 'num'] , row_cnt: 86401
row_cnts: [86401]
actions: [''] , action_params: [None]
cache: [0] , cache_config: {'rows_in_cache': None, 'len_of_cache': 3}


# 6. Getting columns ["day","num"], followed by getting some rows, check cache. Compare with 5.


<b> Same as case 5, switching the accessing order.

In [12]:
df_ = stsfefs_get6[["day","num"]][B]
print(df_)
print(type(df_))
stsfefs_get6.print_tsfefs_info()


       day   num
36002    1  2034
36003    1  2035
36004    1  2036
43202    1  2036
43203    1  2037
43204    1  2038
61202    1  2041
61203    1  2042
61204    1  2043
<class 'pandas.core.frame.DataFrame'>
path: /Users/yeehinleung/Documents/GreekIsGood/Products/TSFEFS/test SafeTSFEFS , name: get6
pieces: ['zlwjcvjg']
fr: 2020-01-01 00:00:00 , to: 2020-01-02 00:00:00
frs: ['2020-01-01 00:00:00']
tos: ['2020-01-02 00:00:00']
types: ['csv']
time_col: time , datetime_format: %Y-%m-%d %H:%M:%S
piece_name_len: 8
colnames: ['time', 'day', 'num'] , row_cnt: 86401
row_cnts: [86401]
actions: [''] , action_params: [None]
cache: [0] , cache_config: {'rows_in_cache': None, 'len_of_cache': 3}


<b> You can see the cache are completely different for case 5 & 6.

In [13]:
stsfefs_get6.remove()
stsfefs_get5.remove()
stsfefs_get4.remove()
stsfefs_get3.remove()
stsfefs_get2.remove()
stsfefs_get1.remove()


# Set Test Strategy: 
## - use the same base,
## - clone to each test case.

# Set Test Plan
# 1. Set new single columns by different types of values, check cache.
# 2. Modify single columns by different types of values, check cache.
# 3. Set new columns ["day_root","num_root"], check cache.
# 4. Modify columns ["day","num"], check cache.
# 5. Set some rows by indices, check cache.
# 6. Filter some timestamps, set some rows by those booleans.


In [14]:
stsfefs_set1 = stsfefs_base.clone(stsfefs_base.tsfefs.path, "set1")
stsfefs_set2 = stsfefs_base.clone(stsfefs_base.tsfefs.path, "set2")
stsfefs_set3 = stsfefs_base.clone(stsfefs_base.tsfefs.path, "set3")
stsfefs_set4 = stsfefs_base.clone(stsfefs_base.tsfefs.path, "set4")
stsfefs_set5 = stsfefs_base.clone(stsfefs_base.tsfefs.path, "set5")
stsfefs_set6 = stsfefs_base.clone(stsfefs_base.tsfefs.path, "set6")


# 1. Set new single columns by different types of values, check cache.


<b> Single value

In [15]:
stsfefs_set1["new_col1"] = 100
S = stsfefs_set1["new_col1"]
print(S)
stsfefs_set1.print_tsfefs_info()


0        100
1        100
2        100
3        100
4        100
        ... 
86396    100
86397    100
86398    100
86399    100
86400    100
Name: new_col1, Length: 86401, dtype: int64
path: /Users/yeehinleung/Documents/GreekIsGood/Products/TSFEFS/test SafeTSFEFS , name: set1
pieces: ['zlwjcvjg']
fr: 2020-01-01 00:00:00 , to: 2020-01-02 00:00:00
frs: ['2020-01-01 00:00:00']
tos: ['2020-01-02 00:00:00']
types: ['csv']
time_col: time , datetime_format: %Y-%m-%d %H:%M:%S
piece_name_len: 8
colnames: ['time', 'day', 'num', 'new_col1'] , row_cnt: 86401
row_cnts: [86401]
actions: [''] , action_params: [None]
cache: [0] , cache_config: {'rows_in_cache': None, 'len_of_cache': 3}


<b> pd.Series

In [16]:
S = stsfefs_set1["day"]
S = S**0.5
stsfefs_set1["new_col2"] = S
S = stsfefs_set1["new_col2"]
print(S)
stsfefs_set1.print_tsfefs_info()


0        1.000000
1        1.000000
2        1.000000
3        1.000000
4        1.000000
           ...   
86396    1.000000
86397    1.000000
86398    1.000000
86399    1.000000
86400    1.414214
Name: new_col2, Length: 86401, dtype: float64
path: /Users/yeehinleung/Documents/GreekIsGood/Products/TSFEFS/test SafeTSFEFS , name: set1
pieces: ['zlwjcvjg']
fr: 2020-01-01 00:00:00 , to: 2020-01-02 00:00:00
frs: ['2020-01-01 00:00:00']
tos: ['2020-01-02 00:00:00']
types: ['csv']
time_col: time , datetime_format: %Y-%m-%d %H:%M:%S
piece_name_len: 8
colnames: ['time', 'day', 'num', 'new_col1', 'new_col2'] , row_cnt: 86401
row_cnts: [86401]
actions: [''] , action_params: [None]
cache: [0] , cache_config: {'rows_in_cache': None, 'len_of_cache': 3}


<b> range

In [17]:
stsfefs_set1["new_col3"] = range(len(stsfefs_set1))
S = stsfefs_set1["new_col3"]
print(S)
stsfefs_set1.print_tsfefs_info()


0            0
1            1
2            2
3            3
4            4
         ...  
86396    86396
86397    86397
86398    86398
86399    86399
86400    86400
Name: new_col3, Length: 86401, dtype: int64
path: /Users/yeehinleung/Documents/GreekIsGood/Products/TSFEFS/test SafeTSFEFS , name: set1
pieces: ['zlwjcvjg']
fr: 2020-01-01 00:00:00 , to: 2020-01-02 00:00:00
frs: ['2020-01-01 00:00:00']
tos: ['2020-01-02 00:00:00']
types: ['csv']
time_col: time , datetime_format: %Y-%m-%d %H:%M:%S
piece_name_len: 8
colnames: ['time', 'day', 'num', 'new_col1', 'new_col2', 'new_col3'] , row_cnt: 86401
row_cnts: [86401]
actions: [''] , action_params: [None]
cache: [0] , cache_config: {'rows_in_cache': None, 'len_of_cache': 3}


<b> list

In [18]:
stsfefs_set1["new_col4"] = list(range(len(stsfefs_set1)))
S = stsfefs_set1["new_col4"]
print(S)
stsfefs_set1.print_tsfefs_info()


0            0
1            1
2            2
3            3
4            4
         ...  
86396    86396
86397    86397
86398    86398
86399    86399
86400    86400
Name: new_col4, Length: 86401, dtype: int64
path: /Users/yeehinleung/Documents/GreekIsGood/Products/TSFEFS/test SafeTSFEFS , name: set1
pieces: ['zlwjcvjg']
fr: 2020-01-01 00:00:00 , to: 2020-01-02 00:00:00
frs: ['2020-01-01 00:00:00']
tos: ['2020-01-02 00:00:00']
types: ['csv']
time_col: time , datetime_format: %Y-%m-%d %H:%M:%S
piece_name_len: 8
colnames: ['time', 'day', 'num', 'new_col1', 'new_col2', 'new_col3', 'new_col4'] , row_cnt: 86401
row_cnts: [86401]
actions: [''] , action_params: [None]
cache: [0] , cache_config: {'rows_in_cache': None, 'len_of_cache': 3}


<b> np.array

In [19]:
stsfefs_set1["new_col5"] = np.array(range(len(stsfefs_set1)))
S = stsfefs_set1["new_col5"]
print(S)
stsfefs_set1.print_tsfefs_info()


0            0
1            1
2            2
3            3
4            4
         ...  
86396    86396
86397    86397
86398    86398
86399    86399
86400    86400
Name: new_col5, Length: 86401, dtype: int64
path: /Users/yeehinleung/Documents/GreekIsGood/Products/TSFEFS/test SafeTSFEFS , name: set1
pieces: ['zlwjcvjg']
fr: 2020-01-01 00:00:00 , to: 2020-01-02 00:00:00
frs: ['2020-01-01 00:00:00']
tos: ['2020-01-02 00:00:00']
types: ['csv']
time_col: time , datetime_format: %Y-%m-%d %H:%M:%S
piece_name_len: 8
colnames: ['time', 'day', 'num', 'new_col1', 'new_col2', 'new_col3', 'new_col4', 'new_col5'] , row_cnt: 86401
row_cnts: [86401]
actions: [''] , action_params: [None]
cache: [0] , cache_config: {'rows_in_cache': None, 'len_of_cache': 3}


<b> np.array, shape = (n,1) <-- DON'T !!!!

In [20]:
arr = np.array(range(len(stsfefs_set1)))
arr = arr.reshape(-1,1)
print(arr.shape)
stsfefs_set1["new_col6"] = arr
S = stsfefs_set1["new_col6"]
print(S)
stsfefs_set1.print_tsfefs_info()


(86401, 1)
0            [0]
1            [1]
2            [2]
3            [3]
4            [4]
          ...   
86396    [86396]
86397    [86397]
86398    [86398]
86399    [86399]
86400    [86400]
Name: new_col6, Length: 86401, dtype: object
path: /Users/yeehinleung/Documents/GreekIsGood/Products/TSFEFS/test SafeTSFEFS , name: set1
pieces: ['zlwjcvjg']
fr: 2020-01-01 00:00:00 , to: 2020-01-02 00:00:00
frs: ['2020-01-01 00:00:00']
tos: ['2020-01-02 00:00:00']
types: ['csv']
time_col: time , datetime_format: %Y-%m-%d %H:%M:%S
piece_name_len: 8
colnames: ['time', 'day', 'num', 'new_col1', 'new_col2', 'new_col3', 'new_col4', 'new_col5', 'new_col6'] , row_cnt: 86401
row_cnts: [86401]
actions: [''] , action_params: [None]
cache: [0] , cache_config: {'rows_in_cache': None, 'len_of_cache': 3}


<b> Correct example of np.array, shape = (n,1)

In [21]:
arr = np.array(range(len(stsfefs_set1)))
arr = arr.reshape(-1,1)
print(arr.shape)
stsfefs_set1[["new_col6"]] = arr
S = stsfefs_set1["new_col6"]
print(S)
stsfefs_set1.print_tsfefs_info()


(86401, 1)
0            0
1            1
2            2
3            3
4            4
         ...  
86396    86396
86397    86397
86398    86398
86399    86399
86400    86400
Name: new_col6, Length: 86401, dtype: int64
path: /Users/yeehinleung/Documents/GreekIsGood/Products/TSFEFS/test SafeTSFEFS , name: set1
pieces: ['zlwjcvjg']
fr: 2020-01-01 00:00:00 , to: 2020-01-02 00:00:00
frs: ['2020-01-01 00:00:00']
tos: ['2020-01-02 00:00:00']
types: ['csv']
time_col: time , datetime_format: %Y-%m-%d %H:%M:%S
piece_name_len: 8
colnames: ['time', 'day', 'num', 'new_col1', 'new_col2', 'new_col3', 'new_col4', 'new_col5', 'new_col6'] , row_cnt: 86401
row_cnts: [86401]
actions: [''] , action_params: [None]
cache: [0] , cache_config: {'rows_in_cache': None, 'len_of_cache': 3}


<b> DataFrame

In [22]:
df_ = pd.DataFrame({"dummy":range(len(stsfefs_set1))})
stsfefs_set1["new_col7"] = df_
S = stsfefs_set1["new_col7"]
print(S)
stsfefs_set1.print_tsfefs_info()


0            0
1            1
2            2
3            3
4            4
         ...  
86396    86396
86397    86397
86398    86398
86399    86399
86400    86400
Name: new_col7, Length: 86401, dtype: int64
path: /Users/yeehinleung/Documents/GreekIsGood/Products/TSFEFS/test SafeTSFEFS , name: set1
pieces: ['zlwjcvjg']
fr: 2020-01-01 00:00:00 , to: 2020-01-02 00:00:00
frs: ['2020-01-01 00:00:00']
tos: ['2020-01-02 00:00:00']
types: ['csv']
time_col: time , datetime_format: %Y-%m-%d %H:%M:%S
piece_name_len: 8
colnames: ['time', 'day', 'num', 'new_col1', 'new_col2', 'new_col3', 'new_col4', 'new_col5', 'new_col6', 'new_col7'] , row_cnt: 86401
row_cnts: [86401]
actions: [''] , action_params: [None]
cache: [0] , cache_config: {'rows_in_cache': None, 'len_of_cache': 3}


# 2. Modify single columns by different types of values, check cache.


<b> Single value

In [23]:
stsfefs_set2["day"] = 100
S = stsfefs_set2["day"]
print(S)
stsfefs_set2.print_tsfefs_info()


0        100
1        100
2        100
3        100
4        100
        ... 
86396    100
86397    100
86398    100
86399    100
86400    100
Name: day, Length: 86401, dtype: int64
path: /Users/yeehinleung/Documents/GreekIsGood/Products/TSFEFS/test SafeTSFEFS , name: set2
pieces: ['zlwjcvjg']
fr: 2020-01-01 00:00:00 , to: 2020-01-02 00:00:00
frs: ['2020-01-01 00:00:00']
tos: ['2020-01-02 00:00:00']
types: ['csv']
time_col: time , datetime_format: %Y-%m-%d %H:%M:%S
piece_name_len: 8
colnames: ['time', 'day', 'num'] , row_cnt: 86401
row_cnts: [86401]
actions: [''] , action_params: [None]
cache: [0] , cache_config: {'rows_in_cache': None, 'len_of_cache': 3}


<b> pd.Series

In [24]:
S = stsfefs_set2["day"]
S = S**0.5
stsfefs_set2["day"] = S
S = stsfefs_set2["day"]
print(S)
stsfefs_set2.print_tsfefs_info()


0        10.0
1        10.0
2        10.0
3        10.0
4        10.0
         ... 
86396    10.0
86397    10.0
86398    10.0
86399    10.0
86400    10.0
Name: day, Length: 86401, dtype: float64
path: /Users/yeehinleung/Documents/GreekIsGood/Products/TSFEFS/test SafeTSFEFS , name: set2
pieces: ['zlwjcvjg']
fr: 2020-01-01 00:00:00 , to: 2020-01-02 00:00:00
frs: ['2020-01-01 00:00:00']
tos: ['2020-01-02 00:00:00']
types: ['csv']
time_col: time , datetime_format: %Y-%m-%d %H:%M:%S
piece_name_len: 8
colnames: ['time', 'day', 'num'] , row_cnt: 86401
row_cnts: [86401]
actions: [''] , action_params: [None]
cache: [0] , cache_config: {'rows_in_cache': None, 'len_of_cache': 3}


<b> range

In [25]:
stsfefs_set2["day"] = range(len(stsfefs_set2))
S = stsfefs_set2["day"]
print(S)
stsfefs_set2.print_tsfefs_info()


0            0
1            1
2            2
3            3
4            4
         ...  
86396    86396
86397    86397
86398    86398
86399    86399
86400    86400
Name: day, Length: 86401, dtype: int64
path: /Users/yeehinleung/Documents/GreekIsGood/Products/TSFEFS/test SafeTSFEFS , name: set2
pieces: ['zlwjcvjg']
fr: 2020-01-01 00:00:00 , to: 2020-01-02 00:00:00
frs: ['2020-01-01 00:00:00']
tos: ['2020-01-02 00:00:00']
types: ['csv']
time_col: time , datetime_format: %Y-%m-%d %H:%M:%S
piece_name_len: 8
colnames: ['time', 'day', 'num'] , row_cnt: 86401
row_cnts: [86401]
actions: [''] , action_params: [None]
cache: [0] , cache_config: {'rows_in_cache': None, 'len_of_cache': 3}


<b> list

In [26]:
stsfefs_set2["day"] = list(range(len(stsfefs_set2)))
S = stsfefs_set2["day"]
print(S)
stsfefs_set2.print_tsfefs_info()


0            0
1            1
2            2
3            3
4            4
         ...  
86396    86396
86397    86397
86398    86398
86399    86399
86400    86400
Name: day, Length: 86401, dtype: int64
path: /Users/yeehinleung/Documents/GreekIsGood/Products/TSFEFS/test SafeTSFEFS , name: set2
pieces: ['zlwjcvjg']
fr: 2020-01-01 00:00:00 , to: 2020-01-02 00:00:00
frs: ['2020-01-01 00:00:00']
tos: ['2020-01-02 00:00:00']
types: ['csv']
time_col: time , datetime_format: %Y-%m-%d %H:%M:%S
piece_name_len: 8
colnames: ['time', 'day', 'num'] , row_cnt: 86401
row_cnts: [86401]
actions: [''] , action_params: [None]
cache: [0] , cache_config: {'rows_in_cache': None, 'len_of_cache': 3}


<b> np.array

In [27]:
stsfefs_set2["day"] = np.array(range(len(stsfefs_set2)))
S = stsfefs_set2["day"]
print(S)
stsfefs_set2.print_tsfefs_info()


0            0
1            1
2            2
3            3
4            4
         ...  
86396    86396
86397    86397
86398    86398
86399    86399
86400    86400
Name: day, Length: 86401, dtype: int64
path: /Users/yeehinleung/Documents/GreekIsGood/Products/TSFEFS/test SafeTSFEFS , name: set2
pieces: ['zlwjcvjg']
fr: 2020-01-01 00:00:00 , to: 2020-01-02 00:00:00
frs: ['2020-01-01 00:00:00']
tos: ['2020-01-02 00:00:00']
types: ['csv']
time_col: time , datetime_format: %Y-%m-%d %H:%M:%S
piece_name_len: 8
colnames: ['time', 'day', 'num'] , row_cnt: 86401
row_cnts: [86401]
actions: [''] , action_params: [None]
cache: [0] , cache_config: {'rows_in_cache': None, 'len_of_cache': 3}


<b> np.array, shape = (n,1) <-- DON'T !!!!

In [28]:
arr = np.array(range(len(stsfefs_set2)))
arr = arr.reshape(-1,1)
print(arr.shape)
stsfefs_set2["day"] = arr
S = stsfefs_set2["day"]
print(S)
stsfefs_set2.print_tsfefs_info()


(86401, 1)
0            [0]
1            [1]
2            [2]
3            [3]
4            [4]
          ...   
86396    [86396]
86397    [86397]
86398    [86398]
86399    [86399]
86400    [86400]
Name: day, Length: 86401, dtype: object
path: /Users/yeehinleung/Documents/GreekIsGood/Products/TSFEFS/test SafeTSFEFS , name: set2
pieces: ['zlwjcvjg']
fr: 2020-01-01 00:00:00 , to: 2020-01-02 00:00:00
frs: ['2020-01-01 00:00:00']
tos: ['2020-01-02 00:00:00']
types: ['csv']
time_col: time , datetime_format: %Y-%m-%d %H:%M:%S
piece_name_len: 8
colnames: ['time', 'day', 'num'] , row_cnt: 86401
row_cnts: [86401]
actions: [''] , action_params: [None]
cache: [0] , cache_config: {'rows_in_cache': None, 'len_of_cache': 3}


<b> Correct example of np.array, shape = (n,1)

In [29]:
arr = np.array(range(len(stsfefs_set2)))
arr = arr.reshape(-1,1)
print(arr.shape)
stsfefs_set2[["day"]] = arr
S = stsfefs_set2["day"]
print(S)
stsfefs_set2.print_tsfefs_info()


(86401, 1)
0            0
1            1
2            2
3            3
4            4
         ...  
86396    86396
86397    86397
86398    86398
86399    86399
86400    86400
Name: day, Length: 86401, dtype: int64
path: /Users/yeehinleung/Documents/GreekIsGood/Products/TSFEFS/test SafeTSFEFS , name: set2
pieces: ['zlwjcvjg']
fr: 2020-01-01 00:00:00 , to: 2020-01-02 00:00:00
frs: ['2020-01-01 00:00:00']
tos: ['2020-01-02 00:00:00']
types: ['csv']
time_col: time , datetime_format: %Y-%m-%d %H:%M:%S
piece_name_len: 8
colnames: ['time', 'day', 'num'] , row_cnt: 86401
row_cnts: [86401]
actions: [''] , action_params: [None]
cache: [0] , cache_config: {'rows_in_cache': None, 'len_of_cache': 3}


<b> DataFrame

In [30]:
df_ = pd.DataFrame({"dummy":range(len(stsfefs_set2))})
stsfefs_set2["day"] = df_
S = stsfefs_set2["day"]
print(S)
stsfefs_set2.print_tsfefs_info()


0            0
1            1
2            2
3            3
4            4
         ...  
86396    86396
86397    86397
86398    86398
86399    86399
86400    86400
Name: day, Length: 86401, dtype: int64
path: /Users/yeehinleung/Documents/GreekIsGood/Products/TSFEFS/test SafeTSFEFS , name: set2
pieces: ['zlwjcvjg']
fr: 2020-01-01 00:00:00 , to: 2020-01-02 00:00:00
frs: ['2020-01-01 00:00:00']
tos: ['2020-01-02 00:00:00']
types: ['csv']
time_col: time , datetime_format: %Y-%m-%d %H:%M:%S
piece_name_len: 8
colnames: ['time', 'day', 'num'] , row_cnt: 86401
row_cnts: [86401]
actions: [''] , action_params: [None]
cache: [0] , cache_config: {'rows_in_cache': None, 'len_of_cache': 3}


# 3. Set new columns ["day_root","num_root"], check cache. <-- Cannot!


In [31]:
try:
    df_ = stsfefs_set3[["day","num"]]
    df_["day"] = df_["day"].apply(lambda x: x**0.5)
    df_["num"] = df_["num"].apply(lambda x: x**0.5)
    stsfefs_set3[["day_root","num_root"]] = df_
    df_ = stsfefs_set3[["day_root","num_root"]]
    print(df_)
    stsfefs_set3.print_tsfefs_info()
except:
    print()
    print("Cannot set multiple new columns at the same time.")
    print("New single columns have to be set separately.")


Some requested column names don't exits.
You may consider the "update" action.

Cannot set multiple new columns at the same time.
New single columns have to be set separately.


# 4. Modify columns ["day","num"], check cache.


<b> DataFrame

In [32]:
df_ = stsfefs_set4[["day","num"]]
df_["day"] = df_["day"].apply(lambda x: x**0.5)
df_["num"] = df_["num"].apply(lambda x: x**0.5)
stsfefs_set4[["day","num"]] = df_
df_ = stsfefs_set4[["day","num"]]
print(df_)
stsfefs_set4.print_tsfefs_info()


            day        num
0      1.000000  44.966654
1      1.000000  44.977772
2      1.000000  44.988888
3      1.000000  45.000000
4      1.000000  45.011110
...         ...        ...
86396  1.000000  46.475800
86397  1.000000  46.486557
86398  1.000000  46.497312
86399  1.000000  46.508064
86400  1.414214  44.977772

[86401 rows x 2 columns]
path: /Users/yeehinleung/Documents/GreekIsGood/Products/TSFEFS/test SafeTSFEFS , name: set4
pieces: ['zlwjcvjg']
fr: 2020-01-01 00:00:00 , to: 2020-01-02 00:00:00
frs: ['2020-01-01 00:00:00']
tos: ['2020-01-02 00:00:00']
types: ['csv']
time_col: time , datetime_format: %Y-%m-%d %H:%M:%S
piece_name_len: 8
colnames: ['time', 'day', 'num'] , row_cnt: 86401
row_cnts: [86401]
actions: [''] , action_params: [None]
cache: [0] , cache_config: {'rows_in_cache': None, 'len_of_cache': 3}


<b> np.array

In [33]:
arr = np.array(df_.iloc[::-1])
stsfefs_set4[["day","num"]] = arr
df_ = stsfefs_set4[["day","num"]]
print(df_)
stsfefs_set4.print_tsfefs_info()


            day        num
0      1.414214  44.977772
1      1.000000  46.508064
2      1.000000  46.497312
3      1.000000  46.486557
4      1.000000  46.475800
...         ...        ...
86396  1.000000  45.011110
86397  1.000000  45.000000
86398  1.000000  44.988888
86399  1.000000  44.977772
86400  1.000000  44.966654

[86401 rows x 2 columns]
path: /Users/yeehinleung/Documents/GreekIsGood/Products/TSFEFS/test SafeTSFEFS , name: set4
pieces: ['zlwjcvjg']
fr: 2020-01-01 00:00:00 , to: 2020-01-02 00:00:00
frs: ['2020-01-01 00:00:00']
tos: ['2020-01-02 00:00:00']
types: ['csv']
time_col: time , datetime_format: %Y-%m-%d %H:%M:%S
piece_name_len: 8
colnames: ['time', 'day', 'num'] , row_cnt: 86401
row_cnts: [86401]
actions: [''] , action_params: [None]
cache: [0] , cache_config: {'rows_in_cache': None, 'len_of_cache': 3}


# 5. Set some rows by indices, check cache.


<b> Let's set the 3rd and 8th dfs.

In [34]:
indices = list(range(30010,30014)) + list(range(80010,80014))

tss = np.array(stsfefs_set5[stsfefs_set5.tsfefs.time_col])
tss = tss[indices]
df_ = pd.DataFrame({stsfefs_set5.tsfefs.time_col:tss, "a":indices, "b":indices})
df_["a"] = df_["a"].apply(lambda x: (x**2)%100)
df_["b"] = df_["b"].apply(lambda x: int(100*((x**0.5) - int(x**0.5))))


In [35]:
stsfefs_set5[indices] = df_
df_ = stsfefs_set5[indices]
print(df_)
stsfefs_set5.print_tsfefs_info()


                     time  day  num
30010 2020-01-01 08:20:10    0   23
30011 2020-01-01 08:20:11   21   23
30012 2020-01-01 08:20:12   44   23
30013 2020-01-01 08:20:13   69   24
80010 2020-01-01 22:13:30    0   86
80011 2020-01-01 22:13:31   21   86
80012 2020-01-01 22:13:32   44   86
80013 2020-01-01 22:13:33   69   86
path: /Users/yeehinleung/Documents/GreekIsGood/Products/TSFEFS/test SafeTSFEFS , name: set5
pieces: ['zlwjcvjg']
fr: 2020-01-01 00:00:00 , to: 2020-01-02 00:00:00
frs: ['2020-01-01 00:00:00']
tos: ['2020-01-02 00:00:00']
types: ['csv']
time_col: time , datetime_format: %Y-%m-%d %H:%M:%S
piece_name_len: 8
colnames: ['time', 'day', 'num'] , row_cnt: 86401
row_cnts: [86401]
actions: [''] , action_params: [None]
cache: [0] , cache_config: {'rows_in_cache': None, 'len_of_cache': 3}


# 6. Filter some timestamps, set some rows by those booleans.


<b> Select some timestamps from the 3rd and 8th dfs.

In [36]:
ts1 = dt.strptime("2020-01-01 08:20:10", dict_meta["datetime_format"])
ts2 = dt.strptime("2020-01-01 08:20:13", dict_meta["datetime_format"])
B1 = (stsfefs_set6 >= ts1) & (stsfefs_set6 <= ts2)

ts1 = dt.strptime("2020-01-01 22:13:30", dict_meta["datetime_format"])
ts2 = dt.strptime("2020-01-01 22:13:33", dict_meta["datetime_format"])
B2 = (stsfefs_set6 >= ts1) & (stsfefs_set6 <= ts2)

B = B1 | B2


In [37]:
stsfefs_set6[B] = df_
df_ = stsfefs_set6[B]
print(df_)
stsfefs_set6.print_tsfefs_info()


                     time  day  num
30010 2020-01-01 08:20:10    0   23
30011 2020-01-01 08:20:11   21   23
30012 2020-01-01 08:20:12   44   23
30013 2020-01-01 08:20:13   69   24
80010 2020-01-01 22:13:30    0   86
80011 2020-01-01 22:13:31   21   86
80012 2020-01-01 22:13:32   44   86
80013 2020-01-01 22:13:33   69   86
path: /Users/yeehinleung/Documents/GreekIsGood/Products/TSFEFS/test SafeTSFEFS , name: set6
pieces: ['zlwjcvjg']
fr: 2020-01-01 00:00:00 , to: 2020-01-02 00:00:00
frs: ['2020-01-01 00:00:00']
tos: ['2020-01-02 00:00:00']
types: ['csv']
time_col: time , datetime_format: %Y-%m-%d %H:%M:%S
piece_name_len: 8
colnames: ['time', 'day', 'num'] , row_cnt: 86401
row_cnts: [86401]
actions: [''] , action_params: [None]
cache: [0] , cache_config: {'rows_in_cache': None, 'len_of_cache': 3}


In [38]:
stsfefs_set6.remove()
stsfefs_set5.remove()
stsfefs_set4.remove()
stsfefs_set3.remove()
stsfefs_set2.remove()
stsfefs_set1.remove()


In [39]:
stsfefs_base.remove()
