In [2]:
import pandas as pd
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
pd.set_option('display.max_rows', None)


# FSM Synthetic Data

In [3]:
# file request at each state is generated uniformly at random
with pd.HDFStore('data/syntheticfsm/syntheticfsm_col-1_iplc_multifsm_u1_c1_t1000000_d1_f100_04_17_22_04_04.h5') as storage:
    df = storage['df']
    metadata = storage.get_storer('df').attrs.metadata
    # metadata['offset']=3000
    # _=metadata.pop('col')
    # storage.get_storer('df').attrs.metadata=metadata
    _ = metadata.pop('network_graph')

    cutoff=200
    df1 = df.drop(df[df['visits'] < cutoff].index)
    hitrate_filtered=df1['hits'].sum()/df1['visits'].sum()
    df1.sort_values(by=['fsm','visits'], inplace=True, ascending=[True,False])

    print(f'metadata: {metadata}\n')
    print(f'Only states with at least {cutoff} visits are considered.')
    print(f'Sorted in desc orderd of visits. \nAverage hitrate: {hitrate_filtered:0.3}')
    df1[:10]

metadata: {'users': 1, 'caches': 1, 'number of files': 100, 'cache size': 10, 'time': 1000000, 'dataset': 'syntheticfsm', 'col': -1, 'offset': -1, 'algo': 'iplc', 'hitrate_leadcache': 0.300896, 'hitrate': 0.554833}

Only states with at least 200 visits are considered.
Sorted in desc orderd of visits. 
Average hitrate: 0.642


Unnamed: 0_level_0,Unnamed: 1_level_0,visits,hits,hitrate
fsm,state,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,epsilon,217270,65334,0.300704
0,epsilon:28,9655,5268,0.545624
0,epsilon:26,8325,5371,0.645165
0,epsilon:2,7053,5163,0.732029
0,epsilon:79,6448,3855,0.59786
0,epsilon:20,6294,3776,0.599936
0,epsilon:62,6177,3781,0.612109
0,epsilon:64,5530,3717,0.672152
0,epsilon:59,5429,2876,0.529748
0,epsilon:81,5404,3443,0.637121


In [4]:
# file request at each state is generated uniformly at random
with pd.HDFStore('data/syntheticfsm/syntheticfsm_col-1_markov_offline_multifsm_u1_c1_t1000000_d1_f100_04_17_21_24_20.h5') as storage:
    df = storage['df']
    metadata = storage.get_storer('df').attrs.metadata
    # metadata['offset']=3000
    # _=metadata.pop('col')
    # storage.get_storer('df').attrs.metadata=metadata
    _ = metadata.pop('network_graph')

    cutoff=200
    df1 = df.drop(df[df['visits'] < cutoff].index)
    hitrate_filtered=df1['hits'].sum()/df1['visits'].sum()
    df1.sort_values(by=['fsm','visits'], inplace=True, ascending=[True,False])

    print(f'metadata: {metadata}\n')
    print(f'Only states with at least {cutoff} visits are considered.')
    print(f'Sorted in desc orderd of visits. \nAverage hitrate: {hitrate_filtered:0.3}')
    df1[:10]

metadata: {'users': 1, 'caches': 1, 'number of files': 100, 'cache size': 10, 'time': 1000000, 'dataset': 'syntheticfsm', 'col': -1, 'offset': -1, 'algo': 'markov_offline', 'hitrate_leadcache': 0.300896, 'hitrate': 0.7858657858657858}

Only states with at least 200 visits are considered.
Sorted in desc orderd of visits. 
Average hitrate: 0.786


Unnamed: 0_level_0,Unnamed: 1_level_0,visits,hits,hitrate
fsm,state,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,"(28,)",44205,24458,0.553286
0,"(26,)",38111,24773,0.650022
0,"(2,)",32067,23764,0.741073
0,"(79,)",30058,18350,0.610486
0,"(62,)",28722,17790,0.619386
0,"(20,)",28373,17597,0.620202
0,"(64,)",25343,17110,0.675137
0,"(81,)",25131,16213,0.645139
0,"(59,)",24939,13863,0.555876
0,"(36,)",24187,15693,0.64882


In [5]:
# file request at each state is generated using the probability vector [0.5,0.25,0.125,0.0625,0.0625]
with pd.HDFStore('data/syntheticfsm/syntheticfsm_col-1_iplc_multifsm_u1_c1_t500000_d1_f50_04_18_01_30_58.h5') as storage:
    df = storage['df']
    metadata = storage.get_storer('df').attrs.metadata
    # metadata['offset']=3000
    # _=metadata.pop('col')
    # storage.get_storer('df').attrs.metadata=metadata
    _ = metadata.pop('network_graph')

    cutoff=200
    df1 = df.drop(df[df['visits'] < cutoff].index)
    hitrate_filtered=df1['hits'].sum()/df1['visits'].sum()
    df1.sort_values(by=['fsm','visits'], inplace=True, ascending=[True,False])

    print(f'metadata: {metadata}\n')
    print(f'Only states with at least {cutoff} visits are considered.')
    print(f'Sorted in desc orderd of visits. \nAverage hitrate: {hitrate_filtered:0.3}')
    df1[:10]

metadata: {'users': 1, 'caches': 1, 'number of files': 50, 'cache size': 5, 'time': 500000, 'dataset': 'syntheticfsm', 'col': -1, 'offset': -1, 'algo': 'iplc', 'hitrate_leadcache': 0.242522, 'hitrate': 0.615958}

Only states with at least 200 visits are considered.
Sorted in desc orderd of visits. 
Average hitrate: 0.571


Unnamed: 0_level_0,Unnamed: 1_level_0,visits,hits,hitrate
fsm,state,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,epsilon,87186,20601,0.236288
0,epsilon:24,4754,2664,0.56037
0,epsilon:10,4199,2953,0.703263
0,epsilon:29,4174,2071,0.496167
0,epsilon:18,3888,2246,0.577675
0,epsilon:49,3645,2147,0.589026
0,epsilon:25,3035,1532,0.504778
0,epsilon:0,2943,1676,0.569487
0,epsilon:23,2816,1922,0.682528
0,epsilon:17,2532,1727,0.68207


In [6]:
# file request at each state is generated using the probability vector [0.5,0.25,0.125,0.0625,0.0625]
with pd.HDFStore('data/syntheticfsm/syntheticfsm_col-1_markov_offline_multifsm_u1_c1_t500000_d1_f50_04_18_01_15_54.h5') as storage:
    df = storage['df']
    metadata = storage.get_storer('df').attrs.metadata
    # metadata['offset']=3000
    # _=metadata.pop('col')
    # storage.get_storer('df').attrs.metadata=metadata
    _ = metadata.pop('network_graph')

    cutoff=200
    df1 = df.drop(df[df['visits'] < cutoff].index)
    hitrate_filtered=df1['hits'].sum()/df1['visits'].sum()
    df1.sort_values(by=['fsm','visits'], inplace=True, ascending=[True,False])

    print(f'metadata: {metadata}\n')
    print(f'Only states with at least {cutoff} visits are considered.')
    print(f'Sorted in desc orderd of visits. \nAverage hitrate: {hitrate_filtered:0.3}')
    df1[:10]

metadata: {'users': 1, 'caches': 1, 'number of files': 50, 'cache size': 5, 'time': 500000, 'dataset': 'syntheticfsm', 'col': -1, 'offset': -1, 'algo': 'markov_offline', 'hitrate_leadcache': 0.242522, 'hitrate': 0.683763367526735}

Only states with at least 200 visits are considered.
Sorted in desc orderd of visits. 
Average hitrate: 0.684


Unnamed: 0_level_0,Unnamed: 1_level_0,visits,hits,hitrate
fsm,state,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,"(24,)",25657,14749,0.574853
0,"(29,)",25011,12249,0.489745
0,"(10,)",24952,17659,0.707719
0,"(18,)",23568,14490,0.614817
0,"(49,)",22171,13313,0.600469
0,"(25,)",19932,11123,0.558047
0,"(23,)",17535,12435,0.709153
0,"(26,)",15939,12887,0.80852
0,"(0,)",15427,9113,0.590718
0,"(17,)",14829,10139,0.683728


## CMU

In [10]:
with pd.HDFStore('data/cmu/cmu_offset3k_iplc_multifsm_u1_c1_t135711_d1_f300_11_11_22_51_20.h5') as storage:
    df = storage['df']
    metadata = storage.get_storer('df').attrs.metadata
    # metadata['offset']=3000
    # _=metadata.pop('col')
    # storage.get_storer('df').attrs.metadata=metadata
    _ = metadata.pop('network_graph')

    cutoff=200
    df1 = df.drop(df[df['visits'] < cutoff].index)
    hitrate_filtered=df1['hits'].sum()/df1['visits'].sum()
    df1.sort_values(by=['fsm','visits'], inplace=True, ascending=[True,False])

    print(f'metadata: {metadata}\n')
    print(f'Only states with at least {cutoff} visits are considered.')
    print(f'Sorted in desc orderd of visits. \nAverage hitrate: {hitrate_filtered:0.3}')
    # df1

metadata: {'users': 1, 'caches': 1, 'number of files': 300, 'cache size': 30, 'time': 135711, 'dataset': 'cmu', 'algo': 'iplc', 'hitrate_leadcache': 0.9171400991813486, 'hitrate': 0.8408014088762149, 'offset': 3000}

Only states with at least 200 visits are considered.
Sorted in desc orderd of visits. 
Average hitrate: 0.895


In [12]:
with pd.HDFStore('data/cmu/cmu_col-1_iplc_multifsm_u1_c1_t123294_d1_f300_11_12_12_23_35.h5') as storage:
    df = storage['df']
    metadata = storage.get_storer('df').attrs.metadata
    # metadata['offset']=3000
    # _=metadata.pop('col')
    # storage.get_storer('df').attrs.metadata=metadata
    _ = metadata.pop('network_graph')

    cutoff=200
    df1 = df.drop(df[df['visits'] < cutoff].index)
    hitrate_filtered=df1['hits'].sum()/df1['visits'].sum()
    df1.sort_values(by=['fsm','visits'], inplace=True, ascending=[True,False])

    print(f'metadata: {metadata}\n')
    print(f'Only states with at least {cutoff} visits are considered.')
    print(f'Sorted in desc orderd of visits. \nAverage hitrate: {hitrate_filtered:0.3}')
    # df1

metadata: {'users': 1, 'caches': 1, 'number of files': 300, 'cache size': 30, 'time': 123294, 'dataset': 'cmu', 'offset': 7500, 'algo': 'iplc', 'hitrate_leadcache': 0.9072947588690448, 'hitrate': 0.7999983778610475}

Only states with at least 200 visits are considered.
Sorted in desc orderd of visits. 
Average hitrate: 0.883


In [30]:
with pd.HDFStore('data/cmu/cmu_old_iplc_multifsm_u10_c4_t2320_d6_f300_11_10_20_30_07.h5') as storage:
    df = storage['df']
    metadata = storage.get_storer('df').attrs.metadata
    # metadata['offset']=0
    # _=metadata.pop('col')
    # storage.get_storer('df').attrs.metadata=metadata
    _ = metadata.pop('network_graph')

    cutoff=50
    df1 = df.drop(df[df['visits'] < cutoff].index)
    hitrate_filtered=df1['hits'].sum()/df1['visits'].sum()
    df1.sort_values(by=['fsm','visits'], inplace=True, ascending=[True,False])

    print(f'metadata: {metadata}\n')
    print(f'Only states with at least {cutoff} visits are considered.')
    print(f'Sorted in desc orderd of visits. \nAverage hitrate: {hitrate_filtered:0.3}')
    # df1

metadata: {'users': 10, 'caches': 4, 'number of files': 300, 'cache size': 30, 'time': 2320, 'dataset': 'cmu', 'algo': 'iplc', 'hitrate_leadcache': 0.5651293103448276, 'hitrate': 0.5628017241379311, 'offset': 0}

Only states with at least 50 visits are considered.
Sorted in desc orderd of visits. 
Average hitrate: 0.59


In [22]:
with pd.HDFStore('data/cmu/cmu_col-1_iplc_multifsm_u10_c4_t10000_d6_f300_11_12_15_47_20.h5') as storage:
    df = storage['df']
    metadata = storage.get_storer('df').attrs.metadata
    # metadata['offset']=3000
    # _=metadata.pop('col')
    # storage.get_storer('df').attrs.metadata=metadata
    _ = metadata.pop('network_graph')

    cutoff=200
    df1 = df.drop(df[df['visits'] < cutoff].index)
    hitrate_filtered=df1['hits'].sum()/df1['visits'].sum()
    df1.sort_values(by=['fsm','visits'], inplace=True, ascending=[True,False])

    print(f'metadata: {metadata}\n')
    print(f'Only states with at least {cutoff} visits are considered.')
    print(f'Sorted in desc orderd of visits. \nAverage hitrate: {hitrate_filtered:0.3}')
    # df1

metadata: {'users': 10, 'caches': 4, 'number of files': 300, 'cache size': 30, 'time': 10000, 'dataset': 'cmu', 'offset': 7500, 'algo': 'iplc', 'hitrate_leadcache': 0.73282, 'hitrate': 0.76799}

Only states with at least 200 visits are considered.
Sorted in desc orderd of visits. 
Average hitrate: 0.77


In [31]:
with pd.HDFStore('data/cmu/cmu_old_iplc_multifsm_u15_c7_t1546_d8_f300_11_10_21_09_33.h5') as storage:
    df = storage['df']
    metadata = storage.get_storer('df').attrs.metadata
    # metadata['offset']=0
    # metadata['hitrate_leadcache']=0.6432082794307892
    # _=metadata.pop('col')
    # storage.get_storer('df').attrs.metadata=metadata
    _ = metadata.pop('network_graph')

    cutoff=50
    df1 = df.drop(df[df['visits'] < cutoff].index)
    hitrate_filtered=df1['hits'].sum()/df1['visits'].sum()
    df1.sort_values(by=['fsm','visits'], inplace=True, ascending=[True,False])

    print(f'metadata: {metadata}\n')
    print(f'Only states with at least {cutoff} visits are considered.')
    print(f'Sorted in desc orderd of visits. \nAverage hitrate: {hitrate_filtered:0.3}')
    # df1

metadata: {'users': 15, 'caches': 7, 'number of files': 300, 'cache size': 30, 'time': 1546, 'dataset': 'cmu', 'algo': 'iplc', 'hitrate_leadcache': 0.6432082794307892, 'hitrate': 0.5989650711513583, 'offset': 0}

Only states with at least 50 visits are considered.
Sorted in desc orderd of visits. 
Average hitrate: 0.619


In [34]:
with pd.HDFStore('data/cmu/cmu_col-1_iplc_multifsm_u15_c7_t5000_d8_f300_11_12_11_32_12.h5') as storage:
    df = storage['df']
    metadata = storage.get_storer('df').attrs.metadata
    # metadata['offset']=0
    # metadata['hitrate_leadcache']=0.77488
    # _=metadata.pop('col')
    # storage.get_storer('df').attrs.metadata=metadata
    _ = metadata.pop('network_graph')

    cutoff=200
    df1 = df.drop(df[df['visits'] < cutoff].index)
    hitrate_filtered=df1['hits'].sum()/df1['visits'].sum()
    df1.sort_values(by=['fsm','visits'], inplace=True, ascending=[True,False])

    print(f'metadata: {metadata}\n')
    print(f'Only states with at least {cutoff} visits are considered.')
    print(f'Sorted in desc orderd of visits. \nAverage hitrate: {hitrate_filtered:0.3}')
    # df1

metadata: {'users': 15, 'caches': 7, 'number of files': 300, 'cache size': 30, 'time': 5000, 'dataset': 'cmu', 'offset': 3000, 'algo': 'iplc', 'hitrate_leadcache': 0.77488, 'hitrate': 0.7681066666666667}

Only states with at least 200 visits are considered.
Sorted in desc orderd of visits. 
Average hitrate: 0.775


## Synthetic

### Repeated requests 1 to 50 

In [13]:
with pd.HDFStore('data/synthetic/synthetic_col3_iplc_multifsm_u1_c1_t50000_d1_f50_11_03_11_11_47.h5') as storage:
    df = storage['df']
    metadata = storage.get_storer('df').attrs.metadata
    _ = metadata.pop('network_graph')

    cutoff=200
    df1 = df.drop(df[df['visits'] < cutoff].index)
    hitrate_filtered=df1['hits'].sum()/df1['visits'].sum()
    df1.sort_values(by=['fsm','visits'], inplace=True, ascending=[True,False])

    print(f'metadata: {metadata}\n')
    print(f'Only states with at least {cutoff} visits are considered.')
    print(f'Sorted in desc orderd of visits. \nAverage hitrate: {hitrate_filtered:0.3}')
    # df

metadata: {'users': 1, 'caches': 1, 'number of files': 50, 'cache size': 5, 'time': 50000, 'dataset': 'synthetic', 'col': 3, 'algo': 'iplc_multiple_fsm', 'hitrate_leadcache': 0.09896, 'hitrate_iplc': 0.9195}

Only states with at least 200 visits are considered.
Sorted in desc orderd of visits. 
Average hitrate: 0.0796


## Ratings 1M

### 
<font size="3">

| dataset | users | cache |
| ------- | ----- | ----- |
| 0       | 1     | 1     |

In [4]:
with pd.HDFStore('data/ratings/ratings_col0_iplc_multifsm_u1_c1_t82070_d1_f300_10_31_19_35_42.h5') as storage:
    df = storage['df']
    metadata = storage.get_storer('df').attrs.metadata
    _ = metadata.pop('network_graph')

    cutoff=200
    df1 = df.drop(df[df['visits'] < cutoff].index)
    hitrate_filtered=df1['hits'].sum()/df1['visits'].sum()
    df1.sort_values(by=['fsm','visits'], inplace=True, ascending=[True,False])

    print(f'metadata: {metadata}\n')
    print(f'Only states with at least {cutoff} visits are considered.')
    print(f'Sorted in desc orderd of visits. \nAverage hitrate: {hitrate_filtered:0.3}')
    # df1

metadata: {'users': 1, 'caches': 1, 'number of files': 300, 'cache size': 30, 'time': 82070, 'dataset': 'ratings', 'algo': 'iplc_multiple_fsm', 'hitrate_leadcache': 0.4429998781527964, 'hitrate_iplc': 0.3946143536005849}

Only states with at least 200 visits are considered.
Sorted in desc orderd of visits. 
Average hitrate: 0.479


In [5]:
with pd.HDFStore('data/ratings/ratings_col0_markov_online_multifsm_u1_c1_t82070_d1_f300_11_04_23_08_19.h5') as storage:
    df = storage['df']
    metadata = storage.get_storer('df').attrs.metadata
    # metadata['algo']='markov_online'
    # storage.get_storer('df').attrs.metadata=metadata
    _ = metadata.pop('network_graph')

    cutoff=50
    df1 = df.drop(df[df['visits'] < cutoff].index)
    hitrate_filtered=df1['hits'].sum()/df1['visits'].sum()
    df1.sort_values(by=['fsm','visits'], inplace=True, ascending=[True,False])

    print(f'metadata: {metadata}\n')
    print(f'Only states with at least {cutoff} visits are considered.')
    print(f'Sorted in desc orderd of visits. \nAverage hitrate: {hitrate_filtered:0.3}')
    # df1

metadata: {'users': 1, 'caches': 1, 'number of files': 300, 'cache size': 30, 'time': 82070, 'dataset': 'ratings', 'col': 0, 'algo': 'markov_online', 'hitrate_leadcache': 0, 'hitrate': 0.5629043853342919}

Only states with at least 50 visits are considered.
Sorted in desc orderd of visits. 
Average hitrate: 0.571


In [5]:
with pd.HDFStore('data/ratings/ratings_col0_markovoffline_multifsm_u1_c1_t82070_d1_f300_11_02_15_30_33.h5') as storage:
    df = storage['df']
    metadata = storage.get_storer('df').attrs.metadata
    _ = metadata.pop('network_graph')

    cutoff=200
    df1 = df.drop(df[df['visits'] < cutoff].index)
    hitrate_filtered=df1['hits'].sum()/df1['visits'].sum()
    df1.sort_values(by=['fsm','visits'], inplace=True, ascending=[True,False])

    print(f'metadata: {metadata}\n')
    print(f'Only states with at least {cutoff} visits are considered.')
    print(f'Sorted in desc orderd of visits. \nAverage hitrate: {hitrate_filtered:0.3}')
    # df1

metadata: {'users': 1, 'caches': 1, 'number of files': 300, 'cache size': 30, 'time': 82070, 'dataset': 'ratings', 'col': 0, 'algo': 'markov_offline', 'hitrate_leadcache': 0, 'hitrate_iplc': 0.7003911438215117}

Only states with at least 200 visits are considered.
Sorted in desc orderd of visits. 
Average hitrate: 0.695


###
<font size="3">

| dataset | users | cache |
| ------- | ----- | ----- |
| 1       | 1     | 1     |

In [4]:
with pd.HDFStore('data/ratings/ratings_col1_iplc_multifsm_u1_c1_t50000_d1_f300_10_31_20_03_40.h5') as storage:
    df = storage['df']
    metadata = storage.get_storer('df').attrs.metadata
    _ = metadata.pop('network_graph')

    cutoff=200
    df1 = df.drop(df[df['visits'] < cutoff].index)
    hitrate_filtered=df1['hits'].sum()/df1['visits'].sum()
    df1.sort_values(by=['fsm','visits'], inplace=True, ascending=[True,False])

    print(f'metadata: {metadata}\n')
    print(f'Only states with at least {cutoff} visits are considered.')
    print(f'Sorted in desc orderd of visits. \nAverage hitrate: {hitrate_filtered:0.3}')
    # df1

metadata: {'users': 1, 'caches': 1, 'number of files': 300, 'cache size': 30, 'time': 50000, 'dataset': 'ratings', 'col': 1, 'algo': 'iplc_multiple_fsm', 'hitrate_leadcache': 0.47458, 'hitrate_iplc': 0.3819}

Only states with at least 200 visits are considered.
Sorted in desc orderd of visits. 
Average hitrate: 0.477


In [7]:
with pd.HDFStore('data/ratings/ratings_col1_markov_online_multifsm_u1_c1_t50000_d1_f300_11_03_02_29_43.h5') as storage:
    df = storage['df']
    metadata = storage.get_storer('df').attrs.metadata
    # metadata['algo']='markov_online'
    # storage.get_storer('df').attrs.metadata=metadata
    _ = metadata.pop('network_graph')

    cutoff=200
    df1 = df.drop(df[df['visits'] < cutoff].index)
    hitrate_filtered=df1['hits'].sum()/df1['visits'].sum()
    df1.sort_values(by=['fsm','visits'], inplace=True, ascending=[True,False])

    print(f'metadata: {metadata}\n')
    print(f'Only states with at least {cutoff} visits are considered.')
    print(f'Sorted in desc orderd of visits. \nAverage hitrate: {hitrate_filtered:0.3}')
    # df1

metadata: {'users': 1, 'caches': 1, 'number of files': 300, 'cache size': 30, 'time': 50000, 'dataset': 'ratings', 'col': 1, 'algo': 'markov_online', 'hitrate_leadcache': 0, 'hitrate_iplc': 0.5479309586191724}

Only states with at least 200 visits are considered.
Sorted in desc orderd of visits. 
Average hitrate: 0.619


In [24]:
with pd.HDFStore('data/ratings/ratings_col1_markov_offline_multifsm_u1_c1_t50000_d1_f300_11_04_23_24_00.h5') as storage:
    df = storage['df']
    metadata = storage.get_storer('df').attrs.metadata
    # metadata['algo']='markov_online'
    # storage.get_storer('df').attrs.metadata=metadata
    _ = metadata.pop('network_graph')

    cutoff=200
    df1 = df.drop(df[df['visits'] < cutoff].index)
    hitrate_filtered=df1['hits'].sum()/df1['visits'].sum()
    df1.sort_values(by=['fsm','visits'], inplace=True, ascending=[True,False])

    print(f'metadata: {metadata}\n')
    print(f'Only states with at least {cutoff} visits are considered.')
    print(f'Sorted in desc orderd of visits. \nAverage hitrate: {hitrate_filtered:0.3}')
    # df1

metadata: {'users': 1, 'caches': 1, 'number of files': 300, 'cache size': 30, 'time': 50000, 'dataset': 'ratings', 'col': 1, 'algo': 'markov_offline', 'hitrate_leadcache': 0, 'hitrate': 0.7354747094941899}

Only states with at least 200 visits are considered.
Sorted in desc orderd of visits. 
Average hitrate: 0.724


###
<font size="3">

| dataset | users | cache |
| ------- | ----- | ----- |
| 2       | 1     | 1     |

In [5]:
with pd.HDFStore('data/ratings/ratings_col2_iplc_multifsm_u1_c1_t50000_d1_f300_10_31_20_28_04.h5') as storage:
    df = storage['df']
    metadata = storage.get_storer('df').attrs.metadata
    _ = metadata.pop('network_graph')

    cutoff=200
    df1 = df.drop(df[df['visits'] < cutoff].index)
    hitrate_filtered=df1['hits'].sum()/df1['visits'].sum()
    df1.sort_values(by=['fsm','visits'], inplace=True, ascending=[True,False])

    print(f'metadata: {metadata}\n')
    print(f'Only states with at least {cutoff} visits are considered.')
    print(f'Sorted in desc orderd of visits. \nAverage hitrate: {hitrate_filtered:0.3}')
    # df1

metadata: {'users': 1, 'caches': 1, 'number of files': 300, 'cache size': 30, 'time': 50000, 'dataset': 'ratings', 'col': 2, 'algo': 'iplc_multiple_fsm', 'hitrate_leadcache': 0.49248, 'hitrate_iplc': 0.3894}

Only states with at least 200 visits are considered.
Sorted in desc orderd of visits. 
Average hitrate: 0.502


In [28]:
with pd.HDFStore('data/ratings/ratings_col2_markov_online_multifsm_u1_c1_t50000_d1_f300_11_04_23_48_58.h5') as storage:
    df = storage['df']
    metadata = storage.get_storer('df').attrs.metadata
    _ = metadata.pop('network_graph')

    cutoff=200
    df1 = df.drop(df[df['visits'] < cutoff].index)
    hitrate_filtered=df1['hits'].sum()/df1['visits'].sum()
    df1.sort_values(by=['fsm','visits'], inplace=True, ascending=[True,False])

    print(f'metadata: {metadata}\n')
    print(f'Only states with at least {cutoff} visits are considered.')
    print(f'Sorted in desc orderd of visits. \nAverage hitrate: {hitrate_filtered:0.3}')
    # df1

metadata: {'users': 1, 'caches': 1, 'number of files': 300, 'cache size': 30, 'time': 50000, 'dataset': 'ratings', 'col': 2, 'algo': 'markov_online', 'hitrate_leadcache': 0, 'hitrate': 0.5787715754315086}

Only states with at least 200 visits are considered.
Sorted in desc orderd of visits. 
Average hitrate: 0.628


In [29]:
with pd.HDFStore('data/ratings/ratings_col2_markov_offline_multifsm_u1_c1_t50000_d1_f300_11_05_00_05_11.h5') as storage:
    df = storage['df']
    metadata = storage.get_storer('df').attrs.metadata
    _ = metadata.pop('network_graph')

    cutoff=200
    df1 = df.drop(df[df['visits'] < cutoff].index)
    hitrate_filtered=df1['hits'].sum()/df1['visits'].sum()
    df1.sort_values(by=['fsm','visits'], inplace=True, ascending=[True,False])

    print(f'metadata: {metadata}\n')
    print(f'Only states with at least {cutoff} visits are considered.')
    print(f'Sorted in desc orderd of visits. \nAverage hitrate: {hitrate_filtered:0.3}')
    # df1

metadata: {'users': 1, 'caches': 1, 'number of files': 300, 'cache size': 30, 'time': 50000, 'dataset': 'ratings', 'col': 2, 'algo': 'markov_offline', 'hitrate_leadcache': 0, 'hitrate': 0.7434548690973819}

Only states with at least 200 visits are considered.
Sorted in desc orderd of visits. 
Average hitrate: 0.73


###
<font size="3">

| dataset | users | cache |
| ------- | ----- | ----- |
| 3       | 10    | 4     |

In [6]:
with pd.HDFStore('data/ratings/ratings_col3_iplc_multifsm_u10_c4_t5000_d6_f300_11_01_14_51_15.h5') as storage:
    df = storage['df']
    metadata = storage.get_storer('df').attrs.metadata
    _ = metadata.pop('network_graph')

    cutoff=50
    df1 = df.drop(df[df['visits'] < cutoff].index)
    hitrate_filtered=df1['hits'].sum()/df1['visits'].sum()
    df1.sort_values(by=['fsm','visits'], inplace=True, ascending=[True,False])

    print(f'metadata: {metadata}\n')
    print(f'Only states with at least {cutoff} visits are considered.')
    print(f'Sorted in desc orderd of visits. \nAverage hitrate: {hitrate_filtered:0.3}')
    # df1

metadata: {'users': 10, 'caches': 4, 'number of files': 300, 'cache size': 30, 'time': 5000, 'dataset': 'ratings', 'col': 3, 'algo': 'iplc_multiple_fsm', 'hitrate_leadcache': 0.47992, 'hitrate_iplc': 0.4806}

Only states with at least 50 visits are considered.
Sorted in desc orderd of visits. 
Average hitrate: 0.508


In [25]:
with pd.HDFStore('data/ratings/ratings_col3_markov_online_multifsm_u10_c4_t5000_d6_f300_11_03_10_43_55.h5') as storage:
    df = storage['df']
    metadata = storage.get_storer('df').attrs.metadata
    # metadata['algo']='markov_online'
    # storage.get_storer('df').attrs.metadata=metadata
    _ = metadata.pop('network_graph')

    cutoff=50
    df1 = df.drop(df[df['visits'] < cutoff].index)
    hitrate_filtered=df1['hits'].sum()/df1['visits'].sum()
    df1.sort_values(by=['fsm','visits'], inplace=True, ascending=[True,False])

    print(f'metadata: {metadata}\n')
    print(f'Only states with at least {cutoff} visits are considered.')
    print(f'Sorted in desc orderd of visits. \nAverage hitrate: {hitrate_filtered:0.3}')
    # df1

metadata: {'users': 10, 'caches': 4, 'number of files': 300, 'cache size': 30, 'time': 5000, 'dataset': 'ratings', 'col': 3, 'algo': 'markov_online', 'hitrate_leadcache': 0, 'hitrate_iplc': 0.4859771954390878}

Only states with at least 50 visits are considered.
Sorted in desc orderd of visits. 
Average hitrate: 0.593


In [26]:
with pd.HDFStore('data/ratings/ratings_col3_markovoffline_multifsm_u10_c4_t5000_d6_f300_11_02_19_32_56.h5') as storage:
    df = storage['df']
    metadata = storage.get_storer('df').attrs.metadata
    # metadata['algo']='markov_offline'
    # storage.get_storer('df').attrs.metadata=metadata
    _ = metadata.pop('network_graph')

    cutoff=50
    df1 = df.drop(df[df['visits'] < cutoff].index)
    hitrate_filtered=df1['hits'].sum()/df1['visits'].sum()
    df1.sort_values(by=['fsm','visits'], inplace=True, ascending=[True,False])

    print(f'metadata: {metadata}\n')
    print(f'Only states with at least {cutoff} visits are considered.')
    print(f'Sorted in desc orderd of visits. \nAverage hitrate: {hitrate_filtered:0.3}')

metadata: {'users': 10, 'caches': 4, 'number of files': 300, 'cache size': 30, 'time': 5000, 'dataset': 'ratings', 'col': 3, 'algo': 'markov_offline', 'hitrate_leadcache': 0, 'hitrate_iplc': 0.865513102620524}

Only states with at least 50 visits are considered.
Sorted in desc orderd of visits. 
Average hitrate: 0.875


###
<font size="3">

| dataset | users | cache |
| ------- | ----- | ----- |
| 4       | 15    | 7     |

In [7]:
with pd.HDFStore('data/ratings/ratings_col4_iplc_multifsm_u15_c7_t5000_d8_f300_11_01_17_44_49.h5') as storage:
    df = storage['df']
    metadata = storage.get_storer('df').attrs.metadata
    _ = metadata.pop('network_graph')

    cutoff=50
    df1 = df.drop(df[df['visits'] < cutoff].index)
    hitrate_filtered=df1['hits'].sum()/df1['visits'].sum()
    df1.sort_values(by=['fsm','visits'], inplace=True, ascending=[True,False])

    print(f'metadata: {metadata}\n')
    print(f'Only states with at least {cutoff} visits are considered.')
    print(f'Sorted in desc orderd of visits. \nAverage hitrate: {hitrate_filtered:0.3}')
    # df1

metadata: {'users': 15, 'caches': 7, 'number of files': 300, 'cache size': 30, 'time': 5000, 'dataset': 'ratings', 'col': 4, 'algo': 'iplc_multiple_fsm', 'hitrate_leadcache': 0.60236, 'hitrate_iplc': 0.5931733333333333}

Only states with at least 50 visits are considered.
Sorted in desc orderd of visits. 
Average hitrate: 0.616


In [27]:
with pd.HDFStore('data/ratings/ratings_col4_markov_online_multifsm_u15_c7_t5000_d8_f300_11_03_04_02_09.h5') as storage:
    df = storage['df']
    metadata = storage.get_storer('df').attrs.metadata
    # metadata['algo']='markov_online'
    # storage.get_storer('df').attrs.metadata=metadata
    _ = metadata.pop('network_graph')

    cutoff=50
    df1 = df.drop(df[df['visits'] < cutoff].index)
    hitrate_filtered=df1['hits'].sum()/df1['visits'].sum()
    df1.sort_values(by=['fsm','visits'], inplace=True, ascending=[True,False])

    print(f'metadata: {metadata}\n')
    print(f'Only states with at least {cutoff} visits are considered.')
    print(f'Sorted in desc orderd of visits. \nAverage hitrate: {hitrate_filtered:0.3}')
    # df1

metadata: {'users': 15, 'caches': 7, 'number of files': 300, 'cache size': 30, 'time': 5000, 'dataset': 'ratings', 'col': 4, 'algo': 'markov_online', 'hitrate_leadcache': 0, 'hitrate_iplc': 0.6400746816029873}

Only states with at least 50 visits are considered.
Sorted in desc orderd of visits. 
Average hitrate: 0.72


In [30]:
with pd.HDFStore('data/ratings/ratings_col4_markov_offline_multifsm_u10_c4_t5000_d6_f300_11_05_00_36_06.h5') as storage:
    df = storage['df']
    metadata = storage.get_storer('df').attrs.metadata
    # metadata['algo']='markov_online'
    # storage.get_storer('df').attrs.metadata=metadata
    _ = metadata.pop('network_graph')

    cutoff=50
    df1 = df.drop(df[df['visits'] < cutoff].index)
    hitrate_filtered=df1['hits'].sum()/df1['visits'].sum()
    df1.sort_values(by=['fsm','visits'], inplace=True, ascending=[True,False])

    print(f'metadata: {metadata}\n')
    print(f'Only states with at least {cutoff} visits are considered.')
    print(f'Sorted in desc orderd of visits. \nAverage hitrate: {hitrate_filtered:0.3}')
    # df1

metadata: {'users': 10, 'caches': 4, 'number of files': 300, 'cache size': 30, 'time': 5000, 'dataset': 'ratings', 'col': 4, 'algo': 'markov_offline', 'hitrate_leadcache': 0, 'hitrate': 0.8766153230646129}

Only states with at least 50 visits are considered.
Sorted in desc orderd of visits. 
Average hitrate: 0.889
