In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [2]:
from sklearn.utils import resample

def get_mu_sigma(x):
    mus = []
    sigmas = []
    for i in x:
        amu, asigma = [np.float32(k) for k in i[1:-1].split(', ')]
        mus.append(amu)
        sigmas.append(asigma)
    mus = np.array(mus)
    sigmas = np.array(sigmas)
    return mus, sigmas

def _bootstrap_metric(x, n_iters=1000):
    scores = []
    for i in range(n_iters):
        x_boot = resample(x, random_state=i+1)
        scores.append(x_boot.mean())
    scores = np.array(scores)
    return scores.mean(axis=0), scores.std(axis=0)

In [3]:
dirpath = "../results/results/metrics/"

cache_files = ["cache/metrics_knn_pred_test_cache.csv", 
               "cache/metrics_catboost_density_pred_test_cache.csv", 
               "cache/metrics_nf_pred_test_cache.csv"]

ssdrnd_files = ["pools/metrics_knn_pred_test_ssd_random.csv", 
                "pools/metrics_catboost_density_pred_test_ssd_random.csv", 
                "pools/metrics_nf_pred_test_ssd_random.csv"]

ssdseq_files = ["pools/metrics_knn_pred_test_ssd_sequential.csv", 
                "pools/metrics_catboost_density_pred_test_ssd_sequential.csv", 
                "pools/metrics_nf_pred_test_ssd_sequential.csv"]

hddseq_files = ["pools/metrics_knn_pred_test_hdd_sequential.csv", 
                "pools/metrics_catboost_density_pred_test_hdd_sequential.csv", 
                "pools/metrics_nf_pred_test_hdd_sequential.csv"]

batches = [cache_files, ssdrnd_files, ssdseq_files, hddseq_files]

mnames = ['MMD (RBF)', 'FD', 'MEAPE_IOPS', 'MEAPE_LAT', 'SEAPE_IOPS', 'SEAPE_LAT']

In [4]:
for abatch in batches:
    print("\n")
    for afile in abatch:
        print(afile)
        df = pd.read_csv(dirpath+afile)
        for aname in mnames:
            x = df[aname].values
            mus, sigmas = get_mu_sigma(x)
            q = np.quantile(mus, 0.95)
            mu, std = _bootstrap_metric(mus[mus<=q])
            #mu, std = _bootstrap_metric(mus)
            print("%10s = %.3f +- %.3f" % (aname, mu, std))



cache/metrics_knn_pred_test_cache.csv
 MMD (RBF) = 1.391 +- 0.019
        FD = 2456.988 +- 362.232
MEAPE_IOPS = 25.478 +- 1.875
 MEAPE_LAT = 17.877 +- 1.086
SEAPE_IOPS = 203.199 +- 20.384
 SEAPE_LAT = 35.106 +- 1.732
cache/metrics_catboost_density_pred_test_cache.csv
 MMD (RBF) = 1.112 +- 0.022
        FD = 130.695 +- 19.248
MEAPE_IOPS = 6.375 +- 0.365
 MEAPE_LAT = 4.867 +- 0.267
SEAPE_IOPS = 51.679 +- 1.504
 SEAPE_LAT = 33.501 +- 0.956
cache/metrics_nf_pred_test_cache.csv
 MMD (RBF) = 0.510 +- 0.022
        FD = 111.758 +- 15.403
MEAPE_IOPS = 4.266 +- 0.409
 MEAPE_LAT = 2.963 +- 0.219
SEAPE_IOPS = 361.511 +- 22.758
 SEAPE_LAT = 278.634 +- 23.286


pools/metrics_knn_pred_test_ssd_random.csv
 MMD (RBF) = 1.199 +- 0.024
        FD = 597.576 +- 80.724
MEAPE_IOPS = 38.290 +- 3.468
 MEAPE_LAT = 19.152 +- 0.895
SEAPE_IOPS = 78.733 +- 5.285
 SEAPE_LAT = 41.927 +- 1.858
pools/metrics_catboost_density_pred_test_ssd_random.csv
 MMD (RBF) = 0.896 +- 0.024
        FD = 48.341 +- 7.110
MEAPE_IOPS