In [55]:
%load_ext autoreload
%autoreload 2

import os
import numpy as np
import pandas as pd

if 'cachai' not in os.listdir('.'):
    os.chdir('../')
    
import cachai.utils.constants as C
from cachai.utils.models import BaseModel
from cachai.utils.metrics import evaluate, evaluate_groupby

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [56]:
from sklearn.metrics import mean_absolute_error


class TTLSimulator:

    def __init__(self, iterations=1_000):
        self._iterations = iterations
        self._target_params = [
            (50, 5),
            (200, 10),
            (400, 30),
        ]
        # means = np.linspace(10, 500, 10).astype(int)
        # std = np.arange(1, len(means) + 1)**2
        # self._target_params = np.array([means, std]).T

    def update_target_params(self, progress):
        target_params = []
        for param in self._target_params:
            mean = float(
                round(param[0]*np.sin(progress*2*np.pi/2)/(param[0]/2) + param[0], 2)
            )
            std = param[1]
            target_params.append((mean, std))
        self._target_params = target_params

    def feedback(self, y_true, y_pred):
        observation_time = int(min(y_true, y_pred)[0])
        hits = max(0, observation_time-1)
        observation_type = None
        if y_pred[0] < y_true[0]:
            observation_type = C.MISS
        elif y_pred[0] > y_true[0]:
            observation_type = C.STALE
        else:
            observation_type = C.VALID_TTL
        return observation_time, observation_type, hits

    def generate(self):
        # get target
        target_param_index = np.random.randint(0, len(self._target_params))
        target_params = self._target_params[target_param_index]
        y = np.random.normal(target_params[0], target_params[1], 1)

        # generate features from target
        num_features = 1
        correlation = 0.8
        cov_matrix = np.eye(num_features) * (1 - correlation) + np.ones((num_features, num_features)) * correlation
        features = np.random.multivariate_normal(np.ones(num_features) * y, cov_matrix)
        X = features.reshape(1, -1)
        # X = np.full((1, num_features), fill_value=target)
        return X, y

In [57]:
class Experiment():

    DF_COLUMNS = [
        C.EXPERIMENT_NAME, C.MODEL_NAME, C.ITERATION, C.OBSERVATION_TYPE,
        C.OBSERVATION_TIME, C.Y_PRED, C.Y_TRUE, C.HITS, C.MAE
    ]

    def __init__(self, simulator, iterations=1_000):
        self._simulator = simulator
        self._iterations = iterations

    def run(self, experiments):
        df = []
        for experiment in experiments:
            experiment_name = experiment[C.EXPERIMENT_NAME]
            model = experiment[C.MODEL]
            for i in range(self._iterations):
                X, y_true = self._simulator.generate()
                y_pred = model.predict(X)
                observation_time, observation_type, hits = self._simulator.feedback(y_true, y_pred)
                mae = mean_absolute_error(y_true, y_pred)
                model.observe(observation_time, observation_type, hits, y_pred)
                df.append([
                    experiment_name, model.NAME, i, observation_type, observation_time,
                    y_pred[0], y_true[0], hits, mae
                ])
        return pd.DataFrame(df, columns=Experiment.DF_COLUMNS)

In [59]:
class Model(BaseModel):
    NAME = 'SANITY_TEST'

    def __init__(self):
        pass

    def predict(self, X):
        output = X.mean()
        return np.array([output])

    def observe(self, observation_time, observation_type, hits, y_prev):
        a = 2+2

In [60]:
simulator = TTLSimulator()
experiment = Experiment(simulator=simulator)
df = experiment.run([
    {
        C.EXPERIMENT_NAME: 'Sanity test 1',
        C.MODEL: Model(),
    },
    {
        C.EXPERIMENT_NAME: 'Sanity test 1',
        C.MODEL: Model(),
    }
])
df.head(10)

Unnamed: 0,experiment_name,model_name,iteration,observation_type,observation_time,y_pred,y_true,hits,mae
0,Sanity test 1,SANITY_TEST,0,miss,394,394.659449,395.816064,393,1.156615
1,Sanity test 1,SANITY_TEST,1,stale,48,48.688773,48.047935,47,0.640838
2,Sanity test 1,SANITY_TEST,2,miss,198,198.342348,198.608569,197,0.26622
3,Sanity test 1,SANITY_TEST,3,stale,403,404.675372,403.747064,402,0.928308
4,Sanity test 1,SANITY_TEST,4,miss,390,390.904586,391.262003,389,0.357418
5,Sanity test 1,SANITY_TEST,5,miss,445,445.073745,445.521585,444,0.44784
6,Sanity test 1,SANITY_TEST,6,stale,211,213.49574,211.308608,210,2.187132
7,Sanity test 1,SANITY_TEST,7,miss,182,182.706123,183.102472,181,0.396349
8,Sanity test 1,SANITY_TEST,8,stale,198,199.158993,198.043434,197,1.115559
9,Sanity test 1,SANITY_TEST,9,miss,181,181.994648,182.517969,180,0.523321


In [61]:
evaluate(df)

Unnamed: 0,rmse,mae,mabe,msbe,hit_rate,miss_rate,precision,accuracy,recall,false_positive_rate,f1_score,hits_total,hits_mean,len
0,1.009521,0.801094,0.801094,1.019132,0.514,0.486,0.0,0.0,0.0,1.0,0,424833,212.4165,2000


In [62]:
evaluate_groupby(df)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,rmse,mae,mabe,msbe,hit_rate,miss_rate,precision,accuracy,recall,false_positive_rate,f1_score,hits_total,hits_mean,len
experiment_name,model_name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Sanity test 1,SANITY_TEST,0,1.009521,0.801094,0.801094,1.019132,0.514,0.486,0.0,0.0,0.0,1.0,0,424833,212.4165,2000
