In [34]:
%load_ext autoreload
%autoreload 2

import os

if 'cachai' not in os.listdir('.'):
    os.chdir('../')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [35]:
from enum import Enum, auto


class ObservationType(Enum):
    HIT = auto()
    MISS = auto()
    STALE = auto()
    VALID_TTL = auto()

    def __str__(self):
        return self.name

In [36]:
import numpy as np
import pandas as pd
from sklearn.metrics import root_mean_squared_error, mean_absolute_error, f1_score


class TTLSimulator:

    def __init__(self, iterations=1_000):
        self._iterations = iterations
        self._target_params = [
            (50, 5),
            (200, 10),
            (400, 30),
        ]
        # means = np.linspace(10, 500, 10).astype(int)
        # std = np.arange(1, len(means) + 1)**2
        # self._target_params = np.array([means, std]).T

    def update_target_params(self, progress):
        target_params = []
        for param in self._target_params:
            mean = float(
                round(param[0]*np.sin(progress*2*np.pi/2)/(param[0]/2) + param[0], 2)
            )
            std = param[1]
            target_params.append((mean, std))
        self._target_params = target_params

    def feedback(self, y_true, y_pred):
        observation_time = int(min(y_true, y_pred)[0])
        hits = max(0, observation_time-1)
        observation_type = None
        if y_pred[0] < y_true[0]:
            observation_type = ObservationType.MISS
        elif y_pred[0] > y_true[0]:
            observation_type = ObservationType.STALE
        else:
            observation_type = ObservationType.VALID_TTL
        return observation_time, observation_type, hits

    def generate(self):
        # get target
        target_param_index = np.random.randint(0, len(self._target_params))
        target_params = self._target_params[target_param_index]
        y = np.random.normal(target_params[0], target_params[1], 1)

        # generate features from target
        num_features = 1
        correlation = 0.8
        cov_matrix = np.eye(num_features) * (1 - correlation) + np.ones((num_features, num_features)) * correlation
        features = np.random.multivariate_normal(np.ones(num_features) * y, cov_matrix)
        X = features.reshape(1, -1)
        # X = np.full((1, num_features), fill_value=target)
        return X, y

In [37]:
# OTHERS:
# Byte Hit Rate (BHR): The ratio of the number of bytes served from the cache to the total number of bytes requested.
# - BHR = (Total Bytes - Bytes Served) / Total Bytes
# Request Rate (RR): The rate at which requests are being made to the cache.
# - RR = TR / Time
# Latency Metrics:
# - Average Latency: The average time taken to serve a request from the cache.
# - 95th Percentile Latency: The time within which 95% of the requests are served.
# Cache Utilization (CU): The percentage of cache storage being used.
# - CU = (Used Cache Size / Total Cache Size) * 100
# Cache Hit Latency: The time taken to serve a request that results in a cache hit. Use: To ensure that cache hits are served efficiently.
# Miss Rate Decay: Definition: The rate at which the miss rate decreases over time as the cache gets populated. Use: To understand how quickly the cache becomes effective.

from sklearn.calibration import check_consistent_length


def mean_squared_bias_error(y_true, y_pred, *, weights=None):
    check_consistent_length(y_true, y_pred, weights)
    differences = y_pred - y_true
    squared_differences = differences ** 2
    sign_differences = np.sign(differences)
    return np.average(squared_differences * sign_differences, weights=weights)


def mean_absolute_bias_error(y_true, y_pred, *, weights=None):
    check_consistent_length(y_true, y_pred, weights)
    differences = y_pred - y_true
    sign_differences = np.sign(differences)
    return np.average(np.abs(differences) * sign_differences, weights=weights)


def evaluate(df):
    """
    - True Positives (TP) as valid cache hits (True Hits).
    - False Positives (FP) as stale cache hits (False Hits).
    - False Negatives (FN) as cache misses (Misses).
    - True Negatives (TN) as valid cache misses (True Misses).
    - Total requests (TR) as the total number of requests.

    - Hit Rate = (TP + FP) / TR — How many of the requests were served from cache
    - Miss Rate = (FN + TN) / TR — How many of the requests were not served from cache
    - Precision = TP / (TP + FP) — How accurate were the cache hits
    - Accuracy = (TP + TN) / TR — How many of the requests were served correctly
    - Recall = TP / (TP + FN) — How many of the cache hits were found
    - FP Rate = FP / (FP + TP) — How many of the cache hits were stale
    - F1 Score = 2 * (Precision * Recall) / (Precision + Recall)
    """

    metrics = []
    y_true = df['y_true']
    y_pred = df['y_pred']
    hits = df['hits']
    hits_total = hits.sum()
    hits_mean = hits.mean()
    total_requests = len(df)
    true_positive = len(df[df['observation_type'] == ObservationType.HIT.name])
    true_negative = len(df[df['observation_type'] == ObservationType.VALID_TTL.name])
    false_positive = len(df[df['observation_type'] == ObservationType.STALE.name])
    false_negative = len(df[df['observation_type'] == ObservationType.MISS.name])

    rmse = root_mean_squared_error(y_true, y_pred)
    msbe = mean_squared_bias_error(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    mabe = mean_absolute_bias_error(y_true, y_pred)
    cache_hits = true_positive + false_positive
    not_cache_hits = true_negative + false_negative
    hit_rate = cache_hits / total_requests
    miss_rate = not_cache_hits / total_requests
    accuracy = (true_positive + true_negative) / total_requests
    precision = (true_positive / cache_hits) if cache_hits else 0
    recall = (true_positive / (true_positive + false_negative)) if (true_positive + false_negative) else 0
    false_positive_rate = (false_positive / cache_hits) if cache_hits else 0
    f1_score = (2 * (precision * recall) / (precision + recall)) if (precision + recall) else 0

    metrics.append([
        rmse, mae, mabe, msbe,
        hit_rate, miss_rate, precision, accuracy, recall,
        false_positive_rate, f1_score, hits_total, hits_mean, len(df)
    ])
    return pd.DataFrame(metrics, columns=[
        'rmse', 'mae', 'mabe', 'msbe',
        'hit_rate', 'miss_rate', 'precision', 'accuracy', 'recall',
        'false_positive_rate', 'f1_score', 'hits_total', 'hits_mean', 'len'
    ])


def evaluate_groupby(df, groupby=['experiment_name', 'model_name']):
    return df.groupby(groupby).apply(evaluate, include_groups=False)

In [38]:
class Experiment():

    DF_COLUMNS = [
        'experiment_name', 'model_name', 'iteration', 'observation_type', 'observation_time',
        'y_true', 'y_pred', 'hits', 'mae'
    ]

    def __init__(self, simulator, iterations=1_000):
        self._simulator = simulator
        self._iterations = iterations

    def run(self, experiments):
        df = []
        for experiment in experiments:
            experiment_name = experiment['name']
            model = experiment['model']
            for i in range(self._iterations):
                X, y_true = self._simulator.generate()
                y_pred = model.predict(X)
                observation_time, observation_type, hits = self._simulator.feedback(y_true, y_pred)
                mae = mean_absolute_error(y_true, y_pred)
                model.observe(observation_time, observation_type, hits, y_pred)
                df.append([
                    experiment_name, model.NAME, i, observation_type, observation_time,
                    y_pred[0], y_true[0], hits, mae
                ])
        return pd.DataFrame(df, columns=Experiment.DF_COLUMNS)

In [39]:
from abc import ABC, abstractmethod


class BaseModel(ABC):

    @property
    @abstractmethod
    def NAME(self) -> str:
        pass

    @abstractmethod
    def predict(self, X: np.array) -> np.array:
        pass

    @abstractmethod
    def observe(
        self,
        observation_time: int,
        observation_type: ObservationType,
        hits: int,
        prev_prediction: float
    ) -> None:
        pass

In [40]:
class Model(BaseModel):
    NAME = 'OPTIMAL'

    def __init__(self):
        pass

    def predict(self, X):
        output = X.mean()
        return np.array([output])

    def observe(self, observation_time, observation_type, hits, prev_prediction):
        a = 2+2

In [44]:
simulator = TTLSimulator()
experiment = Experiment(simulator=simulator)
df = experiment.run([
    {
        'name': 'TEST 1',
        'model': Model(),
    },
    {
        'name': 'TEST 2',
        'model': Model(),
    }
])
df.head(10)

Unnamed: 0,experiment_name,model_name,iteration,observation_type,observation_time,y_true,y_pred,hits,mae
0,TEST 1,OPTIMAL,0,MISS,46,46.692924,47.012727,45,0.319802
1,TEST 1,OPTIMAL,1,MISS,185,185.935803,187.907594,184,1.971792
2,TEST 1,OPTIMAL,2,STALE,396,397.220546,396.444897,395,0.775649
3,TEST 1,OPTIMAL,3,STALE,387,388.198278,387.692826,386,0.505452
4,TEST 1,OPTIMAL,4,MISS,43,43.755327,43.927887,42,0.17256
5,TEST 1,OPTIMAL,5,STALE,55,55.541745,55.334914,54,0.20683
6,TEST 1,OPTIMAL,6,STALE,47,48.089252,47.240472,46,0.84878
7,TEST 1,OPTIMAL,7,STALE,196,198.090218,196.661126,195,1.429092
8,TEST 1,OPTIMAL,8,STALE,391,391.878685,391.797775,390,0.08091
9,TEST 1,OPTIMAL,9,MISS,56,56.088645,56.931932,55,0.843286


In [42]:
evaluate(df)

Unnamed: 0,rmse,mae,mabe,msbe,hit_rate,miss_rate,precision,accuracy,recall,false_positive_rate,f1_score,hits_total,hits_mean,len
0,1.031163,0.831953,-0.003793,-0.020272,0.0,0.0,0,0.0,0,0,0,431377,215.6885,2000


In [43]:
evaluate_groupby(df)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,rmse,mae,mabe,msbe,hit_rate,miss_rate,precision,accuracy,recall,false_positive_rate,f1_score,hits_total,hits_mean,len
experiment_name,model_name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
TEST 1,OPTIMAL,0,1.02049,0.828314,0.035535,0.044866,0.0,0.0,0,0.0,0,0,0,210465,210.465,1000
TEST 2,OPTIMAL,0,1.041727,0.835591,-0.043121,-0.085409,0.0,0.0,0,0.0,0,0,0,220912,220.912,1000
