# File Case(AOP base on decorator)
- This is used to cache the Dataframe result, even there are multiply Dataframe, which can help to reduce the huge time in feature engineering
- It also support to log the function time cost and parameters
- It will create a folder **cache** in your working dir automaticlly, which is used to cache the result in this folder


You can also get the demo from [Here](https://github.com/Flyfoxs/file_cache/blob/master/demo.ipynb), it more easy to understand.


## Installation
pip install file_cache

pip install --user file_cache (install without root privilege)

[pypi Link](https://pypi.org/project/file-cache/)

## Sample case

In [1]:
from  file_cache.cache import file_cache
import numpy  as np
import pandas as pd

@file_cache()
def test_cache_normal(name):
    import time
    import numpy  as np
    time.sleep(3)
    return pd.DataFrame(data= np.arange(0,10).reshape(2,5))

normal_df = test_cache_normal('Felix')
normal_df.head()

2018-12-27 11:12:36,493 util_log.py[61] DEBUG Start the program at:LALI2-M-G0MD, 127.0.0.1, with:Load module
2018-12-27 11:12:36,500 util_log.py[41] INFO test_cache_normal begin with(1 paras) :['Felix'], []
2018-12-27 11:12:36,703 cache.py[29] DEBUG try to read cache from file:./cache/test_cache_normal=Felix=.h5, (h5, key:['/df_0'])
2018-12-27 11:12:36,727 reduce_mem.py[63] DEBUG Mem. usage decreased from    0.00 to    0.00 Mb (72.9% reduction)
2018-12-27 11:12:36,733 util_log.py[49] INFO test_cache_normal cost    0.23 sec:(1 paras)(['Felix'], []), return:DataFrame, end 


Unnamed: 0,0,1,2,3,4
0,0,1,2,3,4
1,5,6,7,8,9


## Return mulpiple DF with tuple
Support to cache multiple DF with tuple

In [2]:
import time
from functools import lru_cache

@lru_cache()
@file_cache()
def test_cache_tuple(name):
    time.sleep(3)
    df0 = pd.DataFrame(data= np.arange(5,15).reshape(2,5))
    df1 = pd.DataFrame(data= np.arange(20,30).reshape(2,5))
    return df0, df1

df0, df1 = test_cache_tuple('Felix2')
print(df0 , '\n')
print(df1)

2018-12-27 11:12:36,766 util_log.py[41] INFO test_cache_tuple begin with(1 paras) :['Felix2'], []
2018-12-27 11:12:36,783 cache.py[29] DEBUG try to read cache from file:./cache/test_cache_tuple=Felix2=.h5, (h5, key:['/df_0', '/df_1'])
2018-12-27 11:12:36,804 reduce_mem.py[63] DEBUG Mem. usage decreased from    0.00 to    0.00 Mb (72.9% reduction)
2018-12-27 11:12:36,810 reduce_mem.py[63] DEBUG Mem. usage decreased from    0.00 to    0.00 Mb (72.9% reduction)
2018-12-27 11:12:36,814 util_log.py[49] INFO test_cache_tuple cost    0.05 sec:(1 paras)(['Felix2'], []), return:tuple, end 


    0   1   2   3   4
0   5   6   7   8   9
1  10  11  12  13  14 

    0   1   2   3   4
0  20  21  22  23  24
1  25  26  27  28  29


## Ignore the input paras, if it can not be cached
If the input is DF or cannot be hashed, ignore the cache, run the function directly

In [3]:
@file_cache()
def test_cache_ignore(name):
    df0 = pd.DataFrame(data= np.arange(5,15).reshape(2,5))
    return df0

df = pd.DataFrame(data= np.arange(5,15).reshape(2,5))
ignore = test_cache_ignore(df)


2018-12-27 11:12:36,839 util_log.py[41] INFO test_cache_ignore begin with(1 paras) :['DataFrame'], []
2018-12-27 11:12:36,845 cache.py[113] DEBUG There is DataFrame in the args
2018-12-27 11:12:36,862 reduce_mem.py[63] DEBUG Mem. usage decreased from    0.00 to    0.00 Mb (43.8% reduction)
2018-12-27 11:12:36,867 util_log.py[49] INFO test_cache_ignore cost    0.03 sec:(1 paras)(['DataFrame'], []), return:DataFrame, end 


## Log the function time and parameter

In [4]:
from file_cache.utils.util_log import *
@timed()
def log_time(arg):
    return f'{arg} msg'

print(log_time("hello"))

2018-12-27 11:12:36,879 util_log.py[41] INFO log_time begin with(1 paras) :['hello'], []
2018-12-27 11:12:36,883 util_log.py[49] INFO log_time cost    0.00 sec:(1 paras)(['hello'], []), return:hello msg, end 


hello msg


## Not only support DataFrame, but also support Series

In [1]:
from  file_cache.cache import file_cache
@file_cache()
def get_train_data():
    from sklearn import datasets
    import pandas as pd
    import numpy as np
    data = datasets.load_boston()
    df = pd.DataFrame( data.data , columns=data.feature_names)
    df['target'] = data.target
    df.head()
    return df, df['target']

df, series = get_train_data()
print(type(df), type(series))

df, series = get_train_data()
print(type(df), type(series))


2018-12-27 11:21:42,935 util_log.py[61] DEBUG Start the program at:LALI2-M-G0MD, 127.0.0.1, with:Load module
2018-12-27 11:21:42,941 util_log.py[41] INFO get_train_data begin with(0 paras) :[], []
2018-12-27 11:21:43,067 cache.py[29] DEBUG Read cache from file:./cache/get_train_data==.h5,key:['/df_0', '/df_1']
2018-12-27 11:21:43,098 reduce_mem.py[63] DEBUG Mem. usage decreased from    0.06 to    0.02 Mb (70.0% reduction)
2018-12-27 11:21:43,101 util_log.py[49] INFO get_train_data cost    0.16 sec:(0 paras)([], []), return:tuple, end 
2018-12-27 11:21:43,103 util_log.py[41] INFO get_train_data begin with(0 paras) :[], []
2018-12-27 11:21:43,115 cache.py[29] DEBUG Read cache from file:./cache/get_train_data==.h5,key:['/df_0', '/df_1']
2018-12-27 11:21:43,142 reduce_mem.py[63] DEBUG Mem. usage decreased from    0.06 to    0.02 Mb (70.0% reduction)
2018-12-27 11:21:43,145 util_log.py[49] INFO get_train_data cost    0.04 sec:(0 paras)([], []), return:tuple, end 


<class 'pandas.core.frame.DataFrame'> <class 'pandas.core.series.Series'>
<class 'pandas.core.frame.DataFrame'> <class 'pandas.core.series.Series'>


## Reduce 70+% memory cost for pandas object

In [10]:
from file_cache.utils.reduce_mem import reduce_mem
from file_cache.utils.util_log import logger
@reduce_mem()
def get_train_data():
    from sklearn import datasets
    import pandas as pd
    import numpy as np
    data = datasets.load_boston()
    df = pd.DataFrame( data.data , columns=data.feature_names)
    df.head()
    print(f'Original type:\n{df.dtypes}')
    return df 

df = get_train_data()

print(f'New type:\n{df.dtypes}')


2018-12-27 11:25:32,797 reduce_mem.py[63] DEBUG Mem. usage decreased from    0.05 to    0.01 Mb (74.9% reduction)


Original type:
CRIM       float64
ZN         float64
INDUS      float64
CHAS       float64
NOX        float64
RM         float64
AGE        float64
DIS        float64
RAD        float64
TAX        float64
PTRATIO    float64
B          float64
LSTAT      float64
dtype: object
New type:
CRIM       float16
ZN         float16
INDUS      float16
CHAS       float16
NOX        float16
RM         float16
AGE        float16
DIS        float16
RAD        float16
TAX        float16
PTRATIO    float16
B          float16
LSTAT      float16
dtype: object
