In [1]:
%%capture
import sys
import time
from datetime import datetime as dt
import pytz
IST=pytz.timezone('Asia/Kolkata')

In [2]:
import numpy as np
import pandas as pd
import caching_policy as cp
import importlib
importlib.reload(cp)
from data_loader import *
from network_generator import *
from matplotlib import pyplot as plt
from data_generation import *
import h5py
import seaborn as sns
import ast

In [3]:
folder_path='/home/studio-lab-user/iplc/data/'

### 

In [4]:
accuracy={}
num_files=3
cache_size=2
num_states=50 # number of states of FSM used for generating synthetic data
total_time=10000000#len(data)
prob='exp'
# prob='unif'

# open an h5py file to store the experimental data
store=h5py.File(folder_path+f'synthetic_fsm_num_files{num_files}_cache_size{cache_size}_S{num_states}_T{total_time}_{prob}.h5','w')

In [None]:
# initializing probability distribution for generating the data 
if prob=='exp':
    p=np.zeros(cache_size)
    for i in range(len(p)):
        p[i]=1./(2**(i+1))
    p=p/p.sum()
else:
    p=None
data=generate_data_fsm(num_files,cache_size,num_states,total_time,p)
data=data[:,np.newaxis]

In [6]:
# storing parameters in file
store.create_dataset('data',data=data)
store['data'].attrs['num_files']=num_files
store['data'].attrs['cache_size']=cache_size
store['data'].attrs['total_time']=total_time
store['data'].attrs['num_states']=num_states

In [None]:
# run the vanilla hedge algorithm
cumulative_req_h,hits_h,rec_hitrate_h,rec_states_h=cp.hedge_fsm_single_cache(data,total_time,num_files,cache_size)
accuracy['hedge']=rec_hitrate_h[-1]
print(accuracy['hedge'])
store['hedge/rec_hitrate']=rec_hitrate_h
store['hedge/rec_states']=rec_states_h

In [None]:
# run the markov predictors
for k in range(1,15):
    cumulative_req_m,hits_m,rec_hitrate_m,rec_states_m=cp.markov_online_single_cache(data,total_time,num_files,cache_size,k)
    accuracy[k]=rec_hitrate_m[-1]
    print(f'order {k} markov: ',accuracy[k])

In [None]:
# run the universal caching algorithm, implemented using the Gumbel trick (ignore the function name)
cumulative_req_f,hits_f,rec_hitrate_f=cp.ftpl(data,total_time,num_files,cache_size)
accuracy['ftpl']=hits_f/cumulative_req_f
accuracy['ftpl']
store['data'].attrs['accuracy']=str(accuracy)

In [None]:
store.close()

# Plot

In [None]:
def lb(Q,k,C,N,T):
    '''
    Lower bound as given by eq 9 (\label{cache-miss-hedge}) in the paper.
    Note that eq 9 gives an upperbound on cache misses, 
    but for plotting we convert it into a lower bound on cache hits.
    '''
    tmp=min(1-C/N,np.sqrt(np.log(Q)/(2*(k+1))))
    tmp2=np.log(N*np.e/C)
    return 1-(tmp + np.sqrt( 2*(N**k) * (C/T) * tmp2 * tmp) + (N**k)*(C/T) * tmp2)
# def lb_asym(Q,k,C,N,T):
#     return 1-min(1-C/N,np.sqrt(np.log(Q)/(2*(k+1))))
# for i in range(1,15): 
#     print(i,lb(50,i,2,4,1e7))#,lb_asym(50,i,2,4,1e7))

In [None]:
# plot and save the files
import matplotlib.patches as mpatches 
num_files=4
cache_size=2
num_states=50 # for generating synthetic data
total_time=int(1e7)
# prob='unif'
prob='exp'
store_path=folder_path+f'synthetic_fsm_num_files{num_files}_cache_size{cache_size}_S{num_states}_T{total_time}_{prob}.h5'
with h5py.File(store_path,'r') as store:
    sns.set(style="whitegrid")
    accuracy=ast.literal_eval(store['data'].attrs['accuracy'])
    acc_hedge=accuracy.pop('hedge')
    acc_ftpl=accuracy.pop('ftpl')
    for i in list(accuracy.keys()):
        x=accuracy.pop(i)
        b=max(0,lb(num_states,i,cache_size,num_files,total_time))
        print(i,b,x)
        accuracy[str(i)]=[b,(x-b)]
    # accuracy['UC']=[0,acc_hedge]
    # accuracy['Hedge']=[0,acc_ftpl]
    ax=pd.DataFrame.from_dict(accuracy,orient='index',columns=['Lower Bound','Markov']).plot(kind='bar',stacked=True)
    # ax.containers[1][-2].set_color('r')
    # ax.containers[1][-1].set_color('g')
    ax.axhline(acc_hedge,ls='--',label='Universal Caching')
    ax.axhline(acc_ftpl,ls='--',label='Hedge',c='red')
    handles, labels = ax.get_legend_handles_labels()
    # patchr = mpatches.Patch(color='r', label='Universal\nCaching')
    # patchg = mpatches.Patch(color='g', label='Hedge')
    # handles.extend([patchr,patchg])
    ax.legend(loc='upper left', bbox_to_anchor=(1,1),handles=handles)
    # ax.set_xlabel('Order of Markov Prefetcher with Hedge ($k$)')
    # ax.set_ylabel('Hitrate')
    ax.text(0,1.05,f'num_files: {num_files}\ncache_size:{cache_size}\nnum_states:{num_states}\nprob:{prob}\ntime:{total_time}',transform=ax.transAxes)
    ax.get_figure().savefig(folder_path+f'images/synthetic_fsm_num_files{num_files}_cache_size{cache_size}_S{num_states}_T{total_time}_{prob}_lb.png',bbox_inches="tight")