In [None]:
import numpy as np
from numpy import sqrt, sin, cos, pi, diff
import random as rand
import statistics as stats
import plotly.graph_objects as go
import plotly.express as px
import scipy
import scipy.signal as signal
import plotly.io as pio
import pickle
from prettytable import PrettyTable
import csv
import os
import yaml
import re
pio.renderers.default = "notebook"
#pio.renderers.default = "svg"
pickle_folder = "pickle/"

In [None]:
OUTPUT_DIRS = ["output/pqi/data", "output/pqi/img/", "output/pqi/html/", pickle_folder]
for output_dir in OUTPUT_DIRS:
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

In [None]:
# SAMPLE FUNCTIONS
def const_f(OFF):
    return lambda x: OFF

def sin_f(T, A=1, OFF=0, ABS=False):
    if ABS:
        return lambda x: abs(A*sin(x*T)+OFF)
    else:
        return lambda x: A*sin(x*T)+OFF

def pulse_f(T, D, A=1, OFF=0):
    return lambda x: A+OFF if (np.arange(NUM_SAMPLES) % T < D)[x] else OFF
    
# same as np.gradient(f)
def gradient(f, start, stop, step):
    deltas = []
    f_prev = 0
    for i in np.arange(start, stop, step):
        deltas.append(f(i)-f_prev)
        f_prev = f(i)
    return deltas

In [None]:
# PARAMETERS
NUM_SAMPLES = 100
measures = np.arange(0, NUM_SAMPLES)

## Cloud Perfomance Quality
The indicator summarizes:
- the violation of a threshold
- the amplitude of the variability (width and size)
- the speed of the variability

The ideal cloud would have zero variation

In [None]:
funcs = []
funcs.append(const_f(10))

fig = go.Figure()
for i, func in enumerate(funcs):
    fig.add_trace(go.Scatter(x=measures, y=[func(v) for v in measures], name="f" + str(i)))
    fig.add_trace(go.Scatter(x=measures, y=[stats.mean([func(v) for v in measures])]*NUM_SAMPLES, name="avg f" + str(i)))
fig.update_layout(
        title="ideal cloud performance",
        title_x=0.5,
        xaxis_title="time",
        yaxis_title="score"
    )
fig.show()

for i, func in enumerate(funcs):
    print("f%d  -> avg: %.2f, std: %.2f" % (i,
                                            stats.mean([func(v) for v in measures]),
                                            stats.stdev([func(v) for v in measures])))

### Variation Entity

In [None]:
funcs = []
funcs.append(sin_f(1/NUM_SAMPLES * 2*np.pi, A=5))
funcs.append(sin_f(1/NUM_SAMPLES * 4*np.pi, A=5))

In [None]:
THRESHOLDS = [0.1, 0.1]

In [None]:
fig = go.Figure()
for i, func in enumerate(funcs):
    fig.add_trace(go.Scatter(x=measures, y=[func(v) for v in measures], name="f" + str(i)))
    threshold = THRESHOLDS[i]
    fy = [func(v) for v in measures]
    mean_y = stats.mean(fy)
    max_y = max(fy)
    fig.add_shape(
            # filled Rectangle
                type="rect",
                x0=0,
                y0=mean_y+threshold*max_y,
                x1=NUM_SAMPLES,
                y1=mean_y-threshold*max_y,
                fillcolor=px.colors.qualitative.Plotly[i],
                opacity=0.5,
                layer="below",
            )    
fig.update_layout(
    title="variation entity",
    title_x=0.5,
    xaxis_title="time",
    yaxis_title="score"
)
fig.show()

for i, func in enumerate(funcs):
    fy = [func(v) for v in measures]
    mean_y = stats.mean(fy)
    max_y = max(fy)
    threshold = THRESHOLDS[i]
    I_all = np.trapz(fy, x=measures)
    I_neg = np.trapz(list(map(lambda x: x if x < mean_y-threshold*max_y else 0, fy)), x=measures)
    print("f%d  -> avg: %.2f, std: %.2f, int: %.2f, int_neg: %.2f" % (
        i,
        stats.mean([func(v) for v in measures]),
        stats.stdev([func(v) for v in measures]),
        I_all,
        I_neg))

#### The integral measures the entity of the variation: integral ~ variation

### Variability
Same degradation entity (integral = duration*amplitude) but different variability: short and deep degradation pulses VS long and shallow degradations

In [None]:
funcs = []
funcs.append(pulse_f(50, D=25, A=1))
funcs.append(pulse_f(20, D=5, A=2))
funcs.append(pulse_f(10, D=1, A=5))

In [None]:
fig = go.Figure()
for i, func in enumerate(funcs):
    fig.add_trace(go.Scatter(x=measures, y=[func(v) for v in measures], name="f" + str(i)))
fig.update_layout(
    title="slow vs fast pulses",
    title_x=0.5,
    xaxis_title="time",
    yaxis_title="score"
)
fig.show()

for i, func in enumerate(funcs):
    I = np.trapz([func(v) for v in measures], x=measures)
    print("f%d  -> avg: %.2f, std: %.2f, int: %.2f" % (
        i,
        stats.mean([func(v) for v in measures]),
        stats.stdev([func(v) for v in measures]),
        I))

#### STD measures the variability: std ~ variability

### Variation Speed

Same degradation amplitude but different speeds: slower variation VS faster variation

In [None]:
funcs = []
funcs.append(sin_f(1/NUM_SAMPLES * 2*np.pi))
funcs.append(sin_f(2/NUM_SAMPLES * 2*np.pi))
funcs.append(sin_f(4/NUM_SAMPLES * 2*np.pi))
funcs.append(sin_f(4/NUM_SAMPLES * 2*np.pi, A=2))
funcs.append(sin_f(8/NUM_SAMPLES * 2*np.pi))

In [None]:
fig = go.Figure()
for i, func in enumerate(funcs):
    fig.add_trace(go.Scatter(x=measures, y=[func(v) for v in measures], name="f" + str(i)))
fig.update_layout(
        title="variation speed",
        title_x=0.5,
        xaxis_title="time",
        yaxis_title="score"
    )
fig.show()

for i, func in enumerate(funcs):
    print("f%d  -> avg: %.2f, std: %.2f, int: %.2f, int_e: %.2f" % (
        i,
        stats.mean([func(v) for v in measures]),
        stats.stdev([func(v) for v in measures]),
        scipy.integrate.quad(func, 0, NUM_SAMPLES)[0],
        scipy.integrate.quad(func, 0, NUM_SAMPLES)[1]))

In [None]:
fig = go.Figure()
for i, func in enumerate(funcs):
    fig.add_trace(go.Scatter(x=measures, y=gradient(func,0, NUM_SAMPLES, 1), name="f" + str(i)))
fig.update_layout(
        title="gradient variation",
        title_x=0.5,
        xaxis_title="time",
        yaxis_title="score"
    )
fig.show()

for i, func in enumerate(funcs):
    print("f%d  -> int: %.2f, avg-grad: %.2f, std-grad: %.2f, diff-avg: %.2f, diff-std: %.2f" % (
        i,
        np.trapz(gradient(func,0, NUM_SAMPLES, 1)),
        stats.mean(gradient(func,0, NUM_SAMPLES, 1)),
        stats.stdev(gradient(func,0, NUM_SAMPLES, 1)),
        stats.mean(diff([func(v) for v in measures])/diff(measures)),
        stats.stdev(diff([func(v) for v in measures])/diff(measures))))

#### STD of gradient measure the speed: STD of gradient ~ speed

## Cloud Quality
The required quality depends on the application:
- some application requires slow variations while other can tolerate faster variations
- some application requires small variations while other can tolerate bigger variations
- a slower variation allows a more accurate prediction
- only negative variations (slowdowns) can be considered (positive variations are speedups and will not negatively affect the application performance)


## Indicator
The quality indicator could be a float obtained from a formula with the following parameters:
- threshold
- weight for variation entity
- weight for variation behavior
- weight for variation speed

The parameters are tuned based on the application, e.g.:
- cloud application: quality_indicator(data_sample, th=20%, ve=0.5, vb=0.25, vs=0.25)

## Helpers

In [None]:
# compute the average
def avg_arr(data):
    return np.full_like(data, np.average(data), dtype=float)

# compute the difference from the average
def diff_arr(data, absolute=False):
    if absolute:
        return np.absolute(data - avg_arr(data))
    else:
        return data - avg_arr(data)

# compute the division to calculate the absolute variation
def div_arr(data, absolute=False):
    if absolute:
        return np.absolute(np.divide(data, avg_arr(data)) - np.ones(np.size(data)))
    else:
        return np.divide(data, avg_arr(data)) - np.ones(np.size(data))

# compute the gradient
def gradient_arr(data):
    return np.gradient(data)

def violations(data, threshold):
    return len(list(filter(lambda v: abs(v)>threshold, data)))/len(data)

# compute the quality indicator as a weighted sum
def qi_s(integ, std, grad_std, w_integ, w_std, w_grad_std):
    return (w_integ*abs(integ) + w_std*abs(std) + w_grad_std*abs(grad_std))

In [None]:
thresholds = [0.05, 0.1, 0.2]

def init_table(table):
    table.add_column("", ["avg", "score/$", "std (%)",
                      "viol. (th="+str(threshold)+") (%)",
                      # "integral", "integral + -", "integral -",
                      "integral (%)", "integral + - (%)", "integral - (%)", "integral th - (%)",
                      "gradient-mean", "gradient-std (%)",
                      "div-min (%)", "div-max (%)",
                      "qi_1 (general)", "qi_2 (deg.)", "qi_3 (var.)", "qi_4 (speed)"])
    
def check_htr(x, test, threshold=0):
    # check if the test is HIB or LIB
    if test["htr"] == "HIB":
        # HIB: take only negative values
        return x < threshold
    elif test["htr"] == "LIB":
        # LIB: take only positive values
        return x > threshold
    
def instance_cost(cfg, instance):
    # read the instance cost from the config
    for vm in cfg["vms"]:
        if cfg["vms"][vm]["name"] == instance:
            return cfg["vms"][vm]["price"]

def get_valid_filename(s):
    s = str(s).strip().replace(' ', '_')
    return re.sub(r'(?u)[^-\w.]', '', s)
        
def get_filename(provider, bench_name, extracted_name, key):
    return provider + "_" + get_valid_filename(bench_name + "_" + extracted_name) + "_" + str(key)[0:5]
        
def get_trace_name(name):
    if provider == "aws":
        #return name.replace("-central", "")
        return name[:name.find(" -")]+"-"+name[-1:]
    elif provider == "azure":
        return name

def calculate(table, fig, provider, instance, instance_cost, test_key, test, times, values, output_writer=None):
    # avg
    avg = stats.mean(values)
    # min, max
    min_v = min(values)
    max_v = max(values)
    # std, std%
    std = stats.stdev(values)
    std_perc = std/avg
    # score/$
    score_dollar = avg/instance_cost if test["htr"] == "HIB" else (1/avg)/instance_cost
    # difference array (values-avg)
    diff_a = diff_arr(values)
    diff_a_abs = diff_arr(values, absolute=True)
    # division array (values/avg - 1)
    div_a = div_arr(values)
    div_a_abs = div_arr(values, absolute=True)
    # violations
    viol = violations(div_a, thresholds[0])
    # gradient array
    grad_a = gradient_arr(values)
    grad_avg = stats.mean(grad_a)
    grad_std_perc = stats.stdev(grad_a)/avg
    # integral
    # all: sum of positive and negative variations
    # abs: sum of abs(variations)
    # neg: sum of negative variations (if HIB) or positive variations (if LIB)
    integral_all = np.trapz(diff_a, x=np.arange(len(times)))
    integral_abs = np.trapz(diff_a_abs, x=np.arange(len(times)))
    integral_neg = np.trapz(list(map(lambda x: x if check_htr(x, test) else 0, diff_arr(values))), x=np.arange(len(times)))

    # div_all: sum of variations (percentage)
    # div_abs: sum of abs(variations) (percentage)
    # div_neg_th: sum of negative variations (if HIB) or positive variations (if LIB) with threshold (percentage)
    integral_div_all = np.trapz(div_a, x=np.arange(len(times)))/len(times)
    integral_div_abs = np.trapz(div_a_abs, x=np.arange(len(times)))/len(times)
    integral_div_neg = np.trapz(list(map(lambda x: x if check_htr(x, test) else 0, div_a)), x=np.arange(len(times)))/len(times)
    integral_div_neg_th = []
    for threshold in thresholds:
        integral_div_neg_th.append(np.trapz(list(map(lambda x: x if check_htr(x, test, threshold) else 0, div_a)), x=np.arange(len(times)))/len(times))

    # quality indicators
    qi_1 = qi_s(integral_div_neg, std_perc, grad_std_perc, 1/3, 1/3, 1/3)
    qi_2 = qi_s(integral_div_neg, std_perc, grad_std_perc, 0.80, 0.10, 0.10)
    qi_3 = qi_s(integral_div_neg, std_perc, grad_std_perc, 0.10, 0.80, 0.10)
    qi_4 = qi_s(integral_div_neg, std_perc, grad_std_perc, 0.10, 0.10, 0.80)

    table.add_column(instance[:instance.index(" ")],
                     [avg,
                      score_dollar,
                      std_perc*100,
                      viol*100,
                      # integral_all*100, integral_abs*100, integral_neg*100,
                      integral_div_all*100, integral_div_abs*100, integral_div_neg*100, integral_div_neg_th[1]*100,
                      grad_avg, grad_std_perc*100,
                      min(div_a)*100, max(div_a)*100,
                      qi_1*100, qi_2*100, qi_3*100, qi_4*100])
    
    #fig.add_trace(go.Scatter(x=times, y=values, mode='lines', name=instance))
    #fig.add_trace(go.Scatter(x=times, y=avg_arr(values), mode='lines', name=instance + " avg"))
    #fig.add_trace(go.Scatter(x=times, y=diff_a, mode='lines', name=instance + " diff"))
    #fig.add_trace(go.Scatter(x=times, y=div_a, mode='lines', name=instance + " div"))
    #fig.add_trace(go.Scatter(x=times, y=grad_a, mode='lines', name=instance + " gradient"))
    fig.add_trace(go.Bar(
            name=get_trace_name(instance) + " PQI",
            x=[get_trace_name(instance)], y=[qi_1]))
    
    if output_writer:
        output_writer.writerow([provider,
                                instance,
                                test_key,
                                test["bench_name"],
                                test["extracted_name"],
                                test["extracted_unit"],
                                avg,
                                min_v,
                                max_v,
                                std,
                                score_dollar,
                                std_perc,
                                integral_div_neg,
                                integral_div_neg_th[0],
                                integral_div_neg_th[1],
                                integral_div_neg_th[2],
                                grad_std_perc])

## Test data

In [None]:
funcs = []
funcs.append(sin_f(1/NUM_SAMPLES * 2*np.pi, OFF=10))
#funcs.append(sin_f(2/NUM_SAMPLES * 2*np.pi, OFF=10))
funcs.append(sin_f(4/NUM_SAMPLES * 2*np.pi, OFF=10))
funcs.append(pulse_f(50, D=25, A=2, OFF=9))
funcs.append(pulse_f(10, D=1, A=-3, OFF=9))

In [None]:
fig = go.Figure()
table = PrettyTable()
init_table(table)

for i, func in enumerate(funcs):
    times = np.arange(NUM_SAMPLES)
    values = [func(v) for v in times]
    calculate(table, fig, "", "f"+str(i), 1, "t1", {"htr": "HIB"}, times, values)

table.float_format = ".2"
print(table)
fig.update_layout(autosize=True)
fig.update_yaxes(automargin=True)
fig.update_layout(
    title_x=0.5,
    xaxis_title="time",
    yaxis_title="result",
)
fig.show()

## Cloud Data

In [None]:
# load prepared data
tests_uni = pickle.load(open(pickle_folder + "tests_uni.p", "rb"))

for provider in ["aws", "azure"]:
    data = pickle.load(open(pickle_folder + provider + ".p", "rb"))
    config_file = "config_benchmark_viewer_" + provider + ".yml"
    with open(config_file, 'r') as ymlfile:
        cfg = yaml.load(ymlfile, Loader=yaml.FullLoader)
        
    with open("output/pqi/data/" + provider + "_pqi.csv", mode='w') as output_file:
        output_writer = csv.writer(output_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
        output_writer.writerow(["provider","instance","bench_key","bench_name","extracted_name","unit","avg","min","max","std","score_$","std_perc","integral_neg","integral_neg_th_5","integral_neg_th_10","integral_neg_th_20","grad_std_perc"])

        for key, grp in data:
            print(tests_uni[key])
            print("\n")
            fig = go.Figure()
            gbid = grp.groupby("id")

            table = PrettyTable()
            init_table(table)

            for key2, grp2 in gbid:        
                times = grp2["time"]
                values = grp2["value"]

                calculate(table, fig, provider, key2, instance_cost(cfg, key2), key, tests_uni[key], times, values, output_writer)

            table.float_format = ".2"
            print(table)
            fig.update_layout(autosize=True)
            fig.update_yaxes(automargin=True)
            fig.update_layout(
                title=provider + " / " + tests_uni[key]["bench_name"] + " - " + tests_uni[key]["extracted_name"] + " (" + tests_uni[key]["extracted_unit"] + ")",
                title_x=0.5,
                xaxis_title="instance",
                yaxis_title="PQI",
                showlegend=False
            )
            fig.write_html("output/pqi/html/" + get_filename(provider, tests_uni[key]["bench_name"], tests_uni[key]["extracted_name"], key) + ".html", include_plotlyjs="cdn")
            fig.write_image("output/pqi/img/" + get_filename(provider, tests_uni[key]["bench_name"], tests_uni[key]["extracted_name"], key) + ".png", scale=2)
            fig.show()
            print("\n\n")