In [None]:
import os
import math
from typing import Optional, Dict

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import scipy
import numpy as np

from pandas import DataFrame

pd.set_option('max_columns', None)
pd.set_option('max_rows', None)

class Metric:
    name: str
    abbreviation: str
    unit: Optional[str]

    def __init__(self, name: str, abbreviation: str, unit: Optional[str]) -> None:
        self.name = name
        self.abbreviation = abbreviation
        self.unit = unit

    def __str__(self) -> str:
        base = f"{self.name} ({self.abbreviation})"

        if self.unit:
            return f"{base} [{self.unit}]"
        else:
            return base

OUT_DIR = "output"

if not os.path.exists(OUT_DIR):
    os.makedirs(OUT_DIR)

APPLICATIONS = {
    "postgresql-ha": "PostgreSQL",
    "redis": "Redis",
    # "redis-cluster": "Redis Cluster,
}
EXPERIMENTS = {
    "ct": "Control Test",
    "pd": "Perturbation Disabled",
    "vb": "Vary Build",
    "vp": "Vary Patch",
    "vm": "Vary Minor",
}
EXPERIMENTS_ORDER = { value: index for index, value in enumerate(EXPERIMENTS.keys()) }
EXPERIMENTS_NAME_ORDER = { EXPERIMENTS[experiment]: EXPERIMENTS_ORDER[experiment] for experiment in EXPERIMENTS_ORDER.keys()}
METRICS: Dict[str, Metric] = {
    "time_to_initialize": Metric(
        name="Time to Complete Initialization",
        abbreviation="TCI",
        unit="s",
        ),
    "time_to_first_request": Metric(
        name="Time to First Request",
        abbreviation="TFR",
        unit="s",
    ),
    "time_to_all_requests": Metric(
        name="Time to All Containers Handle Requests",
        abbreviation="TACHR",
        unit="s",
    ),
    "restarts": Metric(
        name="Amount of Container Restarts",
        abbreviation="ACR",
        unit=None,
    ),
}

In [None]:
# Import all data

try:
    # Use the current file as the base-path
    base_path = os.path.dirname(__file__)
except NameError:
    # Fall back to the current directory if it is not available,
    # is it means that we are in interactive mode.
    base_path = "."


# Read all datafiles as separate DataFrames
dfs = {
    (application_name, experiment): pd.read_json(path, lines=True)
    if os.path.isfile(path := os.path.join(base_path, "..", "results", f"{experiment}_{application}.jsonl")) else None
    for experiment in EXPERIMENTS.keys()
    for application, application_name in APPLICATIONS.items()
}

# Combine all DataFrames
df = pd.concat(dfs, names=["Application", "experiment"])

# Adds the experiment and application index as a column
df.reset_index(level=[0, 1], inplace=True)

# Adds a human readable experiment name column
df["Experiment Type"] = df["experiment"].apply(lambda x: EXPERIMENTS[x])


In [None]:
# Define methods to make and store plots

def save_plot(name: str):
    for file_type in ["pdf", "png", "jpg", "svg"]:
        plt.savefig(os.path.join(OUT_DIR, f"{name}.{file_type}"),
                    transparent=True,
                    bbox_inches='tight'
                    )

def show_plot(data: DataFrame, x: str, y: str, xlabel: str, ylabel: str, hue: Optional[str] = None) -> None:
    sns.set(style="ticks")

    f, ax = plt.subplots(figsize=(12, 3))

    sns.boxplot(data=data,
                x=x,
                y=y,
                hue=hue,
                )

    sns.swarmplot(data=data,
                  x=x,
                  y=y,
                  size=3,
                  hue=hue,
                  alpha=0.5,
                  )

    ax.xaxis.grid(True)
    ax.set_xlim(xmin=-1,)
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)

# All Raw Data

In [None]:
# Display the data loaded
df

# Amount of Experiment Iterations

Shows the amount of times each experiment has been ran.

*Note: Redis does not have any results for "Vary Minor", as no suitable minor version is available to test with.*

In [None]:
# Create a pivot table to count the amount of experiment iterations
pivot = df.pivot_table(index="Experiment Type",
                       columns="Application",
                       values="restarts",
                       aggfunc=np.size,
                       fill_value="N/A",
                       margins=True,
                       margins_name="Total")

# Sort the rows of the pivot table based on the perturbation intensity of the experiments
sorted_index = sorted(pivot.index, key=lambda v: EXPERIMENTS_NAME_ORDER.get(v, math.inf))
pivot.reindex(sorted_index)

## Average Time to Initialize (TTI) and Amount of Restarts

In [None]:
df.groupby(["Experiment Type", "Application"], sort=False)\
    [["time_to_initialize", "restarts"]]\
    .mean()\
    .round(2)

# Control Test vs. Perturbation Disabled

In [None]:
for metric in METRICS.keys():
    show_plot(data=df[(df.experiment.isin(["ct", "pd"]))],
              x=metric,
              y="Application",
              xlabel=str(METRICS[metric]),
              ylabel="Applications",
              hue="Experiment Type"
              )
    save_plot(f"ct_vs_pd-{metric}")

## Statistical Significance

### By Metric & Application

Calculate the p-value for the hypothesis that the mean of the metric are the same between the Control Test and Perturbation Disabled. Welch's t-test is used, as not to assume that the variance is equal.

In [None]:
pvalues = pd.DataFrame(({
    "p-Value": scipy.stats.ttest_ind(*(df[(df.experiment == experiment) & (df.Application == application)][metric]
                                        for experiment in ("ct", "pd",)
                                       ),
                                      equal_var=True
                                      ).pvalue,
    "Metric": str(METRICS[metric]),
    "Application": application,
    }
for metric in filter(lambda x: x != "restarts", METRICS.keys())
for application in ("PostgreSQL", "Redis")
))

pvalues.pivot("Metric", "Application")

### Combined
These p-values can be combined using the Fisher's method, which yields a combined probabilty of:

In [None]:
(_, combined_pvalue) = scipy.stats.combine_pvalues(pvalues["p-Value"])

combined_pvalue

# Impact of Perturbation on the Collected Metrics

In [None]:
for metric in METRICS.keys():
    show_plot(data=df,
              x=metric,
              y="Experiment Type",
              xlabel=str(METRICS[metric]),
              ylabel="Experiment Type",
              hue="Application"
              )
    save_plot(f"box_and_scatter_plot-{metric}")