In [1]:
import os
import pandas as pd
from typing import List, Tuple
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import json

font = {'size': 24}

matplotlib.rc('font', **font)
DATASET_PATH = "../dataset"
INDEX_FIELD = "timestamp"
DATA_FIELD = "num_request"
CPD_CANDIDATE_ROOT = "../05_binseg_series/binseg_cpd_candidate"

In [2]:
def get_data_file_list(dataset_path: str) -> List[str]:
    return os.listdir(dataset_path)

In [3]:
def read_dataset(csv_path: str,index_field:str,data_field:str) -> Tuple[np.ndarray, np.ndarray]:
    df = pd.read_csv(csv_path)
    return df[index_field].to_numpy(), df[data_field].to_numpy()

In [4]:
def read_candidate_cpds(path: str) -> List[int]:
    candidate_cpds = None
    with open(path, "r") as f:
        candidate_cpds = json.load(f)
    return candidate_cpds

In [5]:
workload_to_skip_list = ["workload_1998-06-13", "workload_1998-06-14", "workload_1998-06-20", "workload_1998-06-21", "workload_1998-06-27", "workload_1998-06-28","workload_1998-07-04"]

In [6]:
def get_flapping_time(n_pod_list: List, candidate_cpds: List):
    n_flapping = 0
    candidate_cpds.append(len(n_pod_list))
    seg_start = 0
    for seg_end in candidate_cpds:
        seg_idx = 2
        while seg_start+seg_idx+1 < seg_end-2:
            if n_pod_list[seg_start+seg_idx] != n_pod_list[seg_start+seg_idx+1]:
                n_flapping += 1
            seg_idx += 1
        seg_start = seg_end
    return n_flapping

In [7]:
def get_max_deviation(r_cpu_list: List, upper_constraint: float, lower_constraint: float):
    r_cpu_list = np.array(r_cpu_list)
    max_upper_deviation = np.maximum(0, np.max(r_cpu_list-upper_constraint))
    max_lower_deviation = np.abs(np.min(r_cpu_list-lower_constraint))
    return max_upper_deviation, max_lower_deviation

In [8]:
def plot_flapping(workload: List, candidate_cpds: List, n_pod_list: List, title: str):
    fig, ax1 = plt.subplots()
    color_workload = "#3F51B5"  # material indigo
    color_cp = "#4CAF50"  # material green
    color_n_pod = "#F44336"  # material red
    line_workload, = ax1.plot(workload, color=color_workload, label="num request scaled by 10x")
    ax1.set_xlabel('time (min)')
    ax1.set_ylabel('num request x (10 requests)')

    cp_line = None
    for cp in candidate_cpds:
        cp_line = ax1.axvline(x=cp, color=color_cp, linestyle='--', linewidth=1)
    cp_line.set_label("suggested change point")
    ax2 = ax1.twinx()
    line_n_pod, = ax2.plot(n_pod_list, color=color_n_pod, label="num pod")

    ax1.spines['top'].set_visible(False)
    ax2.spines['top'].set_visible(False)

    lines = [line_workload, line_n_pod, cp_line]
    labels = [line.get_label() for line in lines]
    ax1.legend(lines, labels, loc='upper right')

    if title is not None:
        ax1.set_title(title)

    return fig, ax1

In [None]:
data_file_list = get_data_file_list(DATASET_PATH)
results = ["reactive_result", "proactive_result", "ocpdas_result", "smooth_result"]
for result in results:
    flapping_time_dict = {}
    max_upper_deviation_dict = {}
    max_lower_deviation_dict = {}
    workload_dict = None
    with open(result+"/"+"workload.json", "r") as f:
        workload_dict = json.load(f)
    n_pod_dict = None
    with open(result+"/"+"n_pod.json", "r") as f:
        n_pod_dict = json.load(f)
    r_cpu_dict = None
    with open(result+"/"+"r_cpu.json", "r") as f:
        r_cpu_dict = json.load(f)
    for file_name in data_file_list:
        workload_name = file_name.split(".")[0]
        if workload_name in workload_to_skip_list:
            continue
        print("read %s" % (file_name))
        candidate_cpds = read_candidate_cpds(os.path.join(CPD_CANDIDATE_ROOT, workload_name+".json"))
        workload = workload_dict[workload_name]
        n_pod_list = n_pod_dict[workload_name]
        r_cpu_list = r_cpu_dict[workload_name]
        flapping_time_dict[workload_name] = get_flapping_time(n_pod_list, candidate_cpds)
        max_upper_deviation, max_lower_deviation = get_max_deviation(r_cpu_list, 0.6, 0.4)
        max_upper_deviation_dict[workload_name] = max_upper_deviation
        max_lower_deviation_dict[workload_name] = max_lower_deviation

        # plot
        fig, ax1 = plt.subplots()
        fig.set_size_inches(14, 7)
        xrange = np.arange(len(workload))
        color_workload = "#3F51B5"  # material indigo
        color_cp = "#4CAF50"  # material green
        color_n_pod = "#F44336"  # material red
        line_workload, = ax1.plot(xrange, workload, color=color_workload, label="request number scaled by 10x")
        ax1.set_xlabel('time (min)')
        ax1.set_ylabel('request number (x10 requests)')

        ax1.grid(True, linestyle="--")

        line_cp = None
        for cp in candidate_cpds:
            line_cp = ax1.axvline(x=cp, color=color_cp, linestyle='--', linewidth=1)
        line_cp.set_label("suggested change point")
        ax2 = ax1.twinx()
        line_n_pod, = ax2.plot(xrange, n_pod_list, color=color_n_pod, label="pod number")

        ax1.spines['top'].set_visible(False)
        ax2.spines['top'].set_visible(False)
        ax2.set_ylabel("pod number")

        lines = [line_workload, line_n_pod, line_cp]
        labels = [line.get_label() for line in lines]
        ax1.legend(lines, labels, loc='upper right')

        ax1.set_title(workload_name.replace("_", " "))
        if not os.path.exists(os.path.join(result,"npod_img")):
            os.makedirs(os.path.join(result,"npod_img"))
        fig.savefig(result+"/"+"npod_img/"+workload_name+".pdf")
        plt.close()

    with open(result+"/"+"flapping_time.json", "w") as f:
        json.dump(flapping_time_dict, f, indent=4)
    with open(result+"/"+"max_upper_deviation.json", "w") as f:
        json.dump(max_upper_deviation_dict, f, indent=4)
    with open(result+"/"+"max_lower_deviation.json", "w") as f:
        json.dump(max_lower_deviation_dict, f, indent=4)