In [3]:
import re
import os
import pathlib
import json

import plotnine
import requests
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from matplotlib import gridspec
import seaborn as sns
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
sns.set_theme(style="darkgrid")

In [4]:
experiment = "01_http_max_throughput"

# Root of the project
project_dir = pathlib.Path().resolve().parent

# Directory containing experiment results
results_dir = (pathlib.Path(project_dir) / "results" / experiment).resolve()
results_dir

PosixPath('/home/richard/projects/mesh-bench/results/01_http_max_throughput')

In [56]:

from typing import List, TypedDict

# Represents a single data point of system utilization
class ResourceResult(TypedDict):
    mesh: str
    qps: str
    pod: str
    container: str
    time: int
    cpu: float
    mem: float



def parse_resource_results(result_file: pathlib.Path) -> List[ResourceResult]:
    """ Reads a result file and parses the data.

    The returned data is a list of ResourceResults that represent a
    single metric over a time span of 15 minutes.
    """
    rows = []

    # Regex to extra# 0 -> Full match
    # 1 -> mem/cpu results
    # 2 -> Mesh
    # 3 -> Requested QPS
    name_re = re.compile("^(mem|cpu)_([a-z]+)_(\d+|MAX).*json$")

    # Extract metadata from the result
    matches = name_re.match(file)

    metric = matches[1]
    mesh = matches[2]
    qps = matches[3]
    print(f"Type: {metric}, Mesh: {mesh}, QPS: {qps}")

    with open(result_file) as f:
        data = json.load(f)

    # The resutl is lister per pod/container
    for container in data:

        # Metadata (dimensions)
        meta = container["metric"]

        # Actual values in list[unixtime, value]
        values = container["values"]

        for v in values:
            row: ResourceResult = {
                "mesh": mesh,
                "qps": qps,
                "pod": meta["pod"],
                "container": meta["container"],
                "time": v[0],
                metric: v[1],
            }
            rows.append(row)

    return rows


In [58]:
"""
Generate a list of rows containing a single metric (either cpu or mem)
These later have to be merged based on time
"""


results = []
for root, _, files in os.walk(results_dir, topdown=False):

    print(f"Parsing Experiment results:")
    print(f"Exp: {experiment}\nResults: {len(files)}")

    for file in files:
        # Skip non system resource files
        if not re.match("^(mem|cpu).*$", file):
            continue

        
        path = (pathlib.Path(root) / file).resolve()
        res = parse_resource_results(path)
        results.extend(res)

results

Parsing Experiment results:
Exp: 01_http_max_throughput
Results: 15
Type: cpu, Mesh: istio, QPS: MAX
Type: mem, Mesh: baseline, QPS: MAX
Type: mem, Mesh: istio, QPS: MAX
Type: cpu, Mesh: baseline, QPS: MAX
Type: mem, Mesh: linkerd, QPS: MAX
Type: mem, Mesh: traefik, QPS: MAX
Type: cpu, Mesh: cilium, QPS: MAX
Type: cpu, Mesh: traefik, QPS: MAX
Type: cpu, Mesh: linkerd, QPS: MAX
Type: mem, Mesh: cilium, QPS: MAX


[{'mesh': 'istio',
  'qps': 'MAX',
  'pod': 'target-fortio-746f85d498-vv4vf',
  'container': 'fortio',
  'time': 1656275944,
  'cpu': '0.044302137147215044'},
 {'mesh': 'istio',
  'qps': 'MAX',
  'pod': 'target-fortio-746f85d498-vv4vf',
  'container': 'fortio',
  'time': 1656275947,
  'cpu': '0.048305848894251116'},
 {'mesh': 'istio',
  'qps': 'MAX',
  'pod': 'target-fortio-746f85d498-vv4vf',
  'container': 'fortio',
  'time': 1656275950,
  'cpu': '0.052309560641287195'},
 {'mesh': 'istio',
  'qps': 'MAX',
  'pod': 'target-fortio-746f85d498-vv4vf',
  'container': 'fortio',
  'time': 1656275953,
  'cpu': '0.05631327238832328'},
 {'mesh': 'istio',
  'qps': 'MAX',
  'pod': 'target-fortio-746f85d498-vv4vf',
  'container': 'fortio',
  'time': 1656275956,
  'cpu': '0.060316984135359346'},
 {'mesh': 'istio',
  'qps': 'MAX',
  'pod': 'target-fortio-746f85d498-vv4vf',
  'container': 'fortio',
  'time': 1656275959,
  'cpu': '0.06432069588239543'},
 {'mesh': 'istio',
  'qps': 'MAX',
  'pod': 'tar

In [68]:
""" Create a pandas DataFrame
- Each row represents a single observation
- Each observation takes a form of type ResourceResult
- Rows are merged based on time/pod/container -> this halves rows as both CPU/mem metrics share unix timestamps
- Convert unix timestamps to pd.DateTime
"""

df = pd.DataFrame(data=results)
df



Unnamed: 0,mesh,qps,pod,container,time,cpu,mem
0,istio,MAX,target-fortio-746f85d498-vv4vf,fortio,1656275944,0.044302137147215044,
1,istio,MAX,target-fortio-746f85d498-vv4vf,fortio,1656275947,0.048305848894251116,
2,istio,MAX,target-fortio-746f85d498-vv4vf,fortio,1656275950,0.052309560641287195,
3,istio,MAX,target-fortio-746f85d498-vv4vf,fortio,1656275953,0.05631327238832328,
4,istio,MAX,target-fortio-746f85d498-vv4vf,fortio,1656275956,0.060316984135359346,
...,...,...,...,...,...,...,...
6597,cilium,MAX,cilium-node-init-wbjrr,node-init,1656330672,,9528.872509501325
6598,cilium,MAX,cilium-node-init-wbjrr,node-init,1656330675,,9528.872509501325
6599,cilium,MAX,cilium-node-init-wbjrr,node-init,1656330678,,9528.872509501325
6600,cilium,MAX,cilium-node-init-wbjrr,node-init,1656330681,,9528.872509501325


In [None]:

ids = ["mesh", "qps", "pod", "container", "time"]

df.groupby(ids).ffill()\
  .groupby(ids).last()\
  .reset_index()