# ⏱️ Latency and Error Analysis Notebook

This notebook analyzes **request latency** and **error latency** percentiles from Fortio JSON output files generated during HTTP load testing experiments.

## 🧪 Experiments Covered

1. **01 - HTTP Max Throughput**
    - Compares p50, p75, p90, and p99 latency and error latencies between Istio and Linkerd at max throughput.

2. **02 - HTTP Constant Throughput**
    - Runs at constant QPS values: 1, 1000, and 10000.
    - Compares latency distributions per mesh across different traffic loads.

3. **03 - HTTP Payload Variation**
    - Uses fixed QPS (100) with varying payload sizes: 0, 1000, and 10000 bytes.
    - Explores how latency changes with request payload size.

## 📁 Expected Input

- JSON files under `../results/<experiment>/latencies_<mesh>_<qps>_<payload>_<timestamp>.json`
- Structure:
    - Fortio's `DurationHistogram` and `ErrorsDurationHistogram` blocks
    - Extracted percentiles: **p50, p75, p90, p99**

## 📉 Output

- Line plots for latency and error latency percentiles (per experiment config).
- Output PNGs are saved in:
    - `../diagrams/<experiment>/experiment*_latency*.png`
    - `../diagrams/<experiment>/experiment*_error_latency*.png`

> 🧪 **Benchmark Goal**: Compare service mesh behavior under varying load and payload profiles.

In [7]:
import os
import glob
import json
import matplotlib.pyplot as plt

In [8]:
def extract_percentiles(data, error=False):
    """
    Extract percentiles (p50, p75, p90, p99) from a Fortio JSON blob and convert to milliseconds.
    """
    if error:
        perc_list = data.get("ErrorsDurationHistogram", {}).get("Percentiles", [])
    else:
        perc_list = data.get("DurationHistogram", {}).get("Percentiles", [])
    result = {}
    for entry in perc_list:
        p = entry.get("Percentile")
        if p in [50, 75, 90, 99]:
            # Multiply by 1000 to convert seconds to milliseconds.
            result[p] = entry.get("Value") * 1000 if entry.get("Value") is not None else None
    return result

def extract_params_from_filename(file_path):
    """
    Parses a filename of the form:
       latencies_<mesh>_<qps>_<payload>_<timestamp>.json
    and returns (mesh, qps, payload).
    """
    base = os.path.basename(file_path)
    base = base[len("latencies_"):]  # Remove prefix.
    base = base.replace(".json", "")
    parts = base.split("_")
    mesh = parts[0]
    qps = int(parts[1])
    payload = int(parts[2])
    return mesh, qps, payload

In [9]:
# Define directories for experiment 1.
experiment_dir = os.path.join("..", "results", "01_http_max_throughput")
diagram_dir = os.path.join("..", "diagrams", "01_http_max_throughput")
os.makedirs(diagram_dir, exist_ok=True)

# Locate all latency JSON files for experiment 1.
latency_files = glob.glob(os.path.join(experiment_dir, "latencies_*.json"))

# Dictionaries to store results per mesh.
results_latency = {}  # For the normal latency percentiles.
results_error = {}    # For error latency percentiles.

for file in latency_files:
    with open(file, 'r') as f:
        data = json.load(f)
    mesh, qps, payload = extract_params_from_filename(file)
    # (For experiment 1, qps and payload should be 0.)
    results_latency[mesh] = extract_percentiles(data, error=False)
    results_error[mesh]   = extract_percentiles(data, error=True)

# Define x-axis labels.
x_labels = ["p50", "p75", "p90", "p99"]

# Plot latency percentiles.
plt.figure(figsize=(15, 10))
for mesh, percs in results_latency.items():
    # Order values for 50, 75, 90, and 99
    y_values = [percs.get(50), percs.get(75), percs.get(90), percs.get(99)]
    plt.plot(x_labels, y_values, marker='o', label=mesh)
plt.xlabel("Percentile")
plt.ylabel("Latency (ms)")
plt.title("Experiment 1: Latency Percentiles")
plt.legend()
plt.tight_layout()
latency_output_path = os.path.join(diagram_dir, "global_latency_0.png")
plt.savefig(latency_output_path)
plt.close()

# Plot error latency percentiles.
plt.figure(figsize=(15, 10))
for mesh, percs in results_error.items():
    y_values = [percs.get(50), percs.get(75), percs.get(90), percs.get(99)]
    plt.plot(x_labels, y_values, marker='o', label=mesh)
plt.xlabel("Percentile")
plt.ylabel("Error Latency (ms)")
plt.title("Experiment 1: Error Latency Percentiles")
plt.legend()
plt.tight_layout()
error_output_path = os.path.join(diagram_dir, "global_latency_error_0.png")
plt.savefig(error_output_path)
plt.close()

In [10]:
# %% [code] Experiment 2: Latency and Error Percentiles for Throughputs 1, 1000, and 10000

# Define directories for experiment 2.
experiment_dir = os.path.join("..", "results", "02_http_constant_throughput")
diagram_dir = os.path.join("..", "diagrams", "02_http_constant_throughput")
os.makedirs(diagram_dir, exist_ok=True)

# Locate all latency JSON files for experiment 2.
latency_files = glob.glob(os.path.join(experiment_dir, "latencies_*.json"))

# Group results by QPS.
results_latency = {}  # structure: { qps: {mesh: {50:..., 75:..., ... } } }
results_error = {}    # similar structure for error histogram.

for file in latency_files:
    with open(file, 'r') as f:
        data = json.load(f)
    mesh, qps, payload = extract_params_from_filename(file)
    # (For experiment 2, payload is expected to be 0.)
    if qps not in results_latency:
        results_latency[qps] = {}
        results_error[qps] = {}
    results_latency[qps][mesh] = extract_percentiles(data, error=False)
    results_error[qps][mesh]   = extract_percentiles(data, error=True)

x_labels = ["p50", "p75", "p90", "p99"]

# For each QPS value (1, 1000, 10000), generate latency and error plots.
for qps_val in sorted(results_latency.keys()):
    # Latency Plot
    plt.figure(figsize=(15, 10))
    for mesh, percs in results_latency[qps_val].items():
        y_values = [percs.get(50), percs.get(75), percs.get(90), percs.get(99)]
        plt.plot(x_labels, y_values, marker='o', label=mesh)
    plt.xlabel("Percentile")
    plt.ylabel("Latency (ms)")
    plt.title(f"Experiment 2: Latency Percentiles at QPS = {qps_val}")
    plt.legend()
    plt.tight_layout()
    latency_outfile = os.path.join(diagram_dir, f"global_latency_{qps_val}.png")
    plt.savefig(latency_outfile)
    plt.close()
    print("Saved Experiment 2 latency diagram for QPS", qps_val, "to", latency_outfile)
    
    # Error Latency Plot
    plt.figure(figsize=(15, 10))
    for mesh, percs in results_error[qps_val].items():
        y_values = [percs.get(50), percs.get(75), percs.get(90), percs.get(99)]
        plt.plot(x_labels, y_values, marker='o', label=mesh)
    plt.xlabel("Percentile")
    plt.ylabel("Error Latency (ms)")
    plt.title(f"Experiment 2: Error Latency Percentiles at QPS = {qps_val}")
    plt.legend()
    plt.tight_layout()
    error_outfile = os.path.join(diagram_dir, f"global_latency_error_{qps_val}.png")
    plt.savefig(error_outfile)
    plt.close()

Saved Experiment 2 latency diagram for QPS 1 to ../diagrams/02_http_constant_throughput/global_latency_1.png
Saved Experiment 2 latency diagram for QPS 1000 to ../diagrams/02_http_constant_throughput/global_latency_1000.png
Saved Experiment 2 latency diagram for QPS 10000 to ../diagrams/02_http_constant_throughput/global_latency_10000.png
Saved Experiment 2 latency diagram for QPS 100000 to ../diagrams/02_http_constant_throughput/global_latency_100000.png
Saved Experiment 2 latency diagram for QPS 1000000 to ../diagrams/02_http_constant_throughput/global_latency_1000000.png


In [None]:
# Define directories for experiment 3.
experiment_dir = os.path.join("..", "results", "03_http_payload")
diagram_dir = os.path.join("..", "diagrams", "03_http_payload")
os.makedirs(diagram_dir, exist_ok=True)

# Locate all latency JSON files for experiment 3.
latency_files = glob.glob(os.path.join(experiment_dir, "latencies_*.json"))

# Group results by payload (qps is expected to be 100 in this experiment).
results_latency = {}  # structure: { payload: {mesh: {50:..., 75:..., ... } } }
results_error = {}    # similar grouping for error histograms.

for file in latency_files:
    with open(file, 'r') as f:
        data = json.load(f)
    mesh, qps, payload = extract_params_from_filename(file)
    if payload not in results_latency:
        results_latency[payload] = {}
        results_error[payload] = {}
    results_latency[payload][mesh] = extract_percentiles(data, error=False)
    results_error[payload][mesh]   = extract_percentiles(data, error=True)

x_labels = ["p50", "p75", "p90", "p99"]

# For each payload value, generate latency and error plots.
for payload_val in sorted(results_latency.keys()):
    # Latency Plot
    plt.figure(figsize=(15, 10))
    for mesh, percs in results_latency[payload_val].items():
        y_values = [percs.get(50), percs.get(75), percs.get(90), percs.get(99)]
        plt.plot(x_labels, y_values, marker='o', label=mesh)
    plt.xlabel("Percentile")
    plt.ylabel("Latency (ms)")
    plt.title(f"Experiment 3: Latency Percentiles (Throughput 100, Payload = {payload_val})")
    plt.legend()
    plt.tight_layout()
    latency_outfile = os.path.join(diagram_dir, f"global_latency_100_{payload_val}.png")
    plt.savefig(latency_outfile)
    plt.close()
    
    # Error Latency Plot
    plt.figure(figsize=(15, 10))
    for mesh, percs in results_error[payload_val].items():
        y_values = [percs.get(50), percs.get(75), percs.get(90), percs.get(99)]
        plt.plot(x_labels, y_values, marker='o', label=mesh)
    plt.xlabel("Percentile")
    plt.ylabel("Error Latency (ms)")
    plt.title(f"Experiment 3: Error Latency Percentiles (Throughput 100, Payload = {payload_val})")
    plt.legend()
    plt.tight_layout()
    error_outfile = os.path.join(diagram_dir, f"global_latency_error_100_{payload_val}.png")
    plt.savefig(error_outfile)
    plt.close()

In [12]:
# Define directories for experiment 4.
grpc_experiment_dir = os.path.join("..", "results", "04_grpc_max_throughput")
grpc_diagram_dir = os.path.join("..", "diagrams", "04_grpc_max_throughput")
os.makedirs(grpc_diagram_dir, exist_ok=True)

# Locate all latency JSON files for experiment 4.
grpc_latency_files = glob.glob(os.path.join(grpc_experiment_dir, "latencies_*.json"))

# Group results by payload (or any parameter extracted from the filename).
# The structure is: { payload: {mesh: {50: value, 75: value, 90: value, 99: value} } }
results_latency_grpc = {}
results_error_grpc = {}

for file in grpc_latency_files:
    with open(file, 'r') as f:
        data = json.load(f)
    # This function is assumed to extract the mesh name, qps, and payload information from the filename.
    mesh, qps, payload = extract_params_from_filename(file)
    if payload not in results_latency_grpc:
        results_latency_grpc[payload] = {}
        results_error_grpc[payload] = {}
    # extract_percentiles should parse the JSON and return a dictionary mapping percentile to value.
    results_latency_grpc[payload][mesh] = extract_percentiles(data, error=False)
    results_error_grpc[payload][mesh] = extract_percentiles(data, error=True)

x_labels = ["p50", "p75", "p90", "p99"]

# For each payload value, generate latency and error plots for GRPC.
for payload_val in sorted(results_latency_grpc.keys()):
    # ---------------------
    # GRPC Latency Plot
    # ---------------------
    plt.figure(figsize=(15, 10))
    for mesh, percs in results_latency_grpc[payload_val].items():
        # The keys 50, 75, 90, and 99 should be available in the dictionary returned by extract_percentiles.
        y_values = [percs.get(50), percs.get(75), percs.get(90), percs.get(99)]
        plt.plot(x_labels, y_values, marker='o', label=mesh)
    plt.xlabel("Percentile")
    plt.ylabel("Latency (ms)")
    plt.title(f"GRPC: Latency Percentiles (Throughput 0, Payload = {payload_val})")
    plt.legend()
    plt.tight_layout()
    latency_outfile = os.path.join(grpc_diagram_dir, f"global_latency_0_{payload_val}.png")
    plt.savefig(latency_outfile)
    plt.close()
    
    # ------------------------------
    # GRPC Error Latency Plot
    # ------------------------------
    plt.figure(figsize=(15, 10))
    for mesh, percs in results_error_grpc[payload_val].items():
        y_values = [percs.get(50), percs.get(75), percs.get(90), percs.get(99)]
        plt.plot(x_labels, y_values, marker='o', label=mesh)
    plt.xlabel("Percentile")
    plt.ylabel("Error Latency (ms)")
    plt.title(f"GRPC: Error Latency Percentiles (Throughput 0, Payload = {payload_val})")
    plt.legend()
    plt.tight_layout()
    error_outfile = os.path.join(grpc_diagram_dir, f"global_latency_error_0_{payload_val}.png")
    plt.savefig(error_outfile)
    plt.close()