# 📈 Metrics Analysis Notebook

This notebook analyzes **CPU** and **Memory** resource usage during performance experiments using Fortio and a service mesh (Istio or Linkerd). It reads resource usage data exported in CSV format from the Kubernetes metrics API and generates visualizations per container.

## 🧪 Experiments Covered

1. **01 - HTTP Max Throughput**
    - Measures max throughput with default Fortio settings.
    - Generates average CPU and memory usage per container.

2. **02 - HTTP Constant Throughput**
    - Uses constant QPS values: 1, 1000, and 10000.
    - Shows how resource usage changes with increasing load.

3. **03 - HTTP Payload Variation**
    - Fixed QPS (100) with payload sizes: 0, 1000, and 10000 bytes.
    - Compares container resource usage with different payload sizes.

## 📁 Expected Input

- CSV files under `../results/<experiment>/metrics_<mesh>_<qps>_<payload>_<timestamp>.csv`
- Structure:
    - `timestamp, namespace, pod, container, cpu(n), memory(Ki)`
    - Plus derived fields: `cpu` (as float), `memory` (as float)

## 📉 Output

- CPU and Memory bar charts per container for each experiment setup.
- Output PNGs are saved in:
    - `../diagrams/<experiment>/cpu_*.png`
    - `../diagrams/<experiment>/memory_*.png`

> 📌 **Note**: CPU values are shown in nanocores, and memory in Ki.

In [1]:
# %% [code] Import required libraries
import os
import glob
import pandas as pd
import matplotlib.pyplot as plt

# Use a default matplotlib style
plt.style.use('default')

In [None]:
def convert_cpu(cpu_str):
    """
    Convert CPU usage from a string with a trailing 'n' (nanocores) 
    to a float representing millicores (m). (1 millicore = 1e6 nanocores)
    """
    try:
        nanocores = float(cpu_str.rstrip('n'))
        millicores = nanocores / 1e6 
        return millicores
    except Exception as e:
        print(f"Error converting CPU value '{cpu_str}': {e}")
        return None

def convert_memory(mem_str):
    """
    Convert memory usage from a string with a trailing 'Ki' to a float representing megabytes (MB).
    (1 MB = 1024 Ki)
    """
    try:
        ki = float(mem_str.rstrip('Ki'))
        mb = ki / 1024.0  # Convert Ki to MB (using 1024 Ki = 1 MB)
        return mb
    except Exception as e:
        print(f"Error converting memory value '{mem_str}': {e}")
        return None


In [None]:
def load_metrics_csv(file_path):
    """
    Reads a CSV file with resource metrics, parses the timestamp, and converts the raw CPU
    (nanocores remain unchanged) and memory values (in Ki).
    
    Returns:
        A pandas DataFrame with columns:
        - timestamp
        - namespace
        - pod
        - container
        - cpu(n) (original)
        - memory(Ki) (original)
        - cpu: numeric CPU in millicore
        - memory: numeric memory in MB
    """
    df = pd.read_csv(file_path)
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    df['cpu'] = df['cpu(n)'].apply(convert_cpu)
    df['memory'] = df['memory(Ki)'].apply(convert_memory)
    return df

def extract_mesh_qps_payload(filename):
    """
    Extract mesh, qps, and payload from a filename assumed to be formatted as:
      metrics_<mesh>_<qps>_<payload>_<timestamp>.csv
    Returns a tuple: (mesh (str), qps (int), payload (int))
    """
    base = os.path.basename(filename)
    parts = base.replace("metrics_", "").replace(".csv", "").split("_")
    if len(parts) < 3:
        return None, None, None
    mesh, qps, payload = parts[0], parts[1], parts[2]
    return mesh, int(qps), int(payload)

def shorten_label(ns, container, max_len=18):
    ns_abbr = ns if len(ns) <= max_len else ns[:max_len - 2] + '..'
    container_abbr = container if len(container) <= max_len else container[:max_len - 2] + '..'
    return f"{ns_abbr}/{container_abbr}"

In [None]:
# %% [code] Process 01_http_max_throughput experiment data and generate aggregated diagrams

# Set path to the 01_http_max_throughput experiment directory
experiment_dir = os.path.join("..", "results", "01_http_max_throughput")
output_dir = os.path.join("..", "diagrams", "01_http_max_throughput")
csv_files = glob.glob(os.path.join(experiment_dir, 'metrics_*.csv'))
os.makedirs(output_dir, exist_ok=True)

if not csv_files:
    print("No CSV files found in:", experiment_dir)
else:
    # Load and combine all CSV files
    df_list = [load_metrics_csv(file) for file in csv_files]
    df_exp = pd.concat(df_list, ignore_index=True)

    # Drop rows with NaN values in CPU or memory
    df_exp = df_exp.dropna(subset=['cpu', 'memory'])

    # Aggregate statistics per namespace/container
    agg_stats = df_exp.groupby(['namespace', 'container'])[['cpu', 'memory']].agg(['mean', 'median', 'std'])
    print("Aggregated stats for 01_http_max_throughput:")
    print(agg_stats)

    # --- CPU Plot ---
    max_cpu = df_exp.groupby(['namespace', 'container'])['cpu'].max()
    labels_cpu = [shorten_label(ns, c) for ns, c in max_cpu.index]

    plt.figure(figsize=(15, 10))
    max_cpu.index = labels_cpu
    max_cpu.plot(kind='bar')
    plt.ylabel("Average CPU Usage (millicore)")
    plt.title("01_http_max_throughput: Average CPU Usage per Container")
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    output_path = os.path.join(output_dir, "cpu_0.png")
    plt.savefig(output_path)
    plt.close()
    print("Saved CPU plot to", output_path)

    # --- Memory Plot ---
    max_memory = df_exp.groupby(['namespace', 'container'])['memory'].max()
    labels_mem = [shorten_label(ns, c) for ns, c in max_memory.index]

    plt.figure(figsize=(15, 10))
    max_memory.index = labels_mem
    max_memory.plot(kind='bar', color='tab:orange')
    plt.ylabel("Average Memory Usage (MB)")
    plt.title("01_http_max_throughput: Average Memory Usage per Container")
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    output_path = os.path.join(output_dir, "memory_0.png")
    plt.savefig(output_path)
    plt.close()
    print("Saved Memory plot to", output_path)


Aggregated stats for 01_http_max_throughput:
                                                cpu                \
                                               mean        median   
namespace              container                                    
istio-system           discovery       3.895681e+06  2.777849e+06   
                       istio-proxy     2.002482e+08  1.328737e+06   
linkerd                destination     1.721479e+06  1.299102e+06   
                       identity        3.541669e+05  3.674660e+05   
                       linkerd-proxy   5.537078e+05  4.467910e+05   
                       policy          5.367995e+05  3.273210e+05   
                       proxy-injector  1.240424e+06  2.051620e+05   
                       sp-validator    1.914184e+05  1.313040e+05   
service-mesh-benchmark istio-proxy     1.513055e+09  1.396071e+09   
                       linkerd-proxy   5.402900e+08  4.915040e+08   

                                                         

In [None]:
# %% [code] Process 02_http_constant_throughput experiment data with QPS information and save diagrams

# Set paths for the results and output diagrams directories
results_dir = os.path.join("..", "results", "02_http_constant_throughput")
output_dir  = os.path.join("..", "diagrams", "02_http_constant_throughput")
os.makedirs(output_dir, exist_ok=True)

# Get all metrics CSV files in the experiment directory
csv_files = glob.glob(os.path.join(results_dir, 'metrics_*.csv'))

# Label shortening helper
def shorten_label(ns, container, max_len=18):
    ns_abbr = ns if len(ns) <= max_len else ns[:max_len - 2] + '..'
    container_abbr = container if len(container) <= max_len else container[:max_len - 2] + '..'
    return f"{ns_abbr}/{container_abbr}"

if not csv_files:
    print("No CSV files found in:", results_dir)
else:
    df_list = []
    for file in csv_files:
        df = load_metrics_csv(file)
        # Extract the qps value from the filename using extract_mesh_qps_payload
        _, qps, _ = extract_mesh_qps_payload(file)
        df['qps'] = qps
        df_list.append(df)
    df_exp = pd.concat(df_list, ignore_index=True)

    # Drop rows with missing values to avoid plotting issues
    df_exp = df_exp.dropna(subset=['cpu', 'memory', 'qps'])

    # Get the unique QPS values present in the data (sorted)
    unique_qps = sorted(df_exp['qps'].unique())

    # For each QPS value, create and save bar charts for CPU and Memory
    for qps_val in unique_qps:
        df_qps = df_exp[df_exp['qps'] == qps_val]
        for metric, ylabel in [('cpu', "CPU Usage (millicore)"), ('memory', "Memory Usage (MB)")]:
            max_metric = df_qps.groupby(['namespace', 'container'])[metric].max()
            labels = [shorten_label(ns, c) for ns, c in max_metric.index]

            plt.figure(figsize=(15, 10))
            max_metric.index = labels
            max_metric.plot(kind='bar')
            plt.xlabel("Container")
            plt.ylabel(ylabel)
            plt.title(f"02_http_constant_throughput: Average {ylabel} (qps = {qps_val})")
            plt.xticks(rotation=45, ha='right')
            plt.tight_layout()

            output_path = os.path.join(output_dir, f"{metric}_{qps_val}.png")
            plt.savefig(output_path)
            plt.close()
            print("Saved plot to", output_path)


Error converting memory value '14Mi': could not convert string to float: '14M'
Error converting memory value '14Mi': could not convert string to float: '14M'
Error converting memory value '14Mi': could not convert string to float: '14M'
Error converting memory value '14Mi': could not convert string to float: '14M'
Error converting memory value '14Mi': could not convert string to float: '14M'
Error converting memory value '14Mi': could not convert string to float: '14M'
Error converting memory value '14Mi': could not convert string to float: '14M'
Error converting memory value '14Mi': could not convert string to float: '14M'
Error converting memory value '14Mi': could not convert string to float: '14M'
Error converting memory value '14Mi': could not convert string to float: '14M'
Error converting memory value '14Mi': could not convert string to float: '14M'
Error converting memory value '14Mi': could not convert string to float: '14M'
Error converting memory value '14Mi': could not conv

In [None]:
# %% [code] Process 03_http_payload experiment data and plot per container (using payload)

# Define directories
experiment_dir = os.path.join("..", "results", "03_http_payload")
output_dir  = os.path.join("..", "diagrams", "03_http_payload")
os.makedirs(output_dir, exist_ok=True)

# Get all metrics CSV files in the experiment directory
csv_files = glob.glob(os.path.join(experiment_dir, 'metrics_*.csv'))

if not csv_files:
    print("No CSV files found in:", experiment_dir)
else:
    # Load each CSV and extract additional parameters (mesh, qps, payload)
    df_list = []
    for file in csv_files:
        df = load_metrics_csv(file)
        mesh, qps, payload = extract_mesh_qps_payload(file)
        df['mesh'] = mesh
        df['qps'] = qps
        df['payload'] = payload
        df_list.append(df)
    df_exp = pd.concat(df_list, ignore_index=True)
    
    # Identify unique qps and payload values
    unique_qps = sorted(df_exp['qps'].dropna().unique())
    unique_payload = sorted(df_exp['payload'].dropna().unique())
    
    # For the purpose of these diagrams, we assume qps is fixed; if not, select an appropriate value.
    qps_val = unique_qps[0] if unique_qps else None
    
    # For each unique payload value, generate separate diagrams for each metric.
    for payload_val in unique_payload:
        # Filter the DataFrame for the current payload value.
        df_subset = df_exp[df_exp['payload'] == payload_val]
        for metric, ylabel in [('cpu', "CPU Usage (millicore)"), ('memory', "Memory Usage (MB)")]:
            max_metric = df_subset.groupby(['namespace', 'container'])[metric].max()
            labels = [shorten_label(ns, c) for ns, c in max_metric.index]

            plt.figure(figsize=(15, 10))
            max_metric.index = labels
            max_metric.plot(kind='bar')
            plt.xlabel("Container")
            plt.ylabel(ylabel)
            plt.title(f"03_http_payload: Average {ylabel} (qps = {qps_val}, payload = {payload_val})")
            plt.xticks(rotation=45, ha='right')
            plt.tight_layout()
            
            output_path = os.path.join(output_dir, f"{metric}_{qps_val}_{payload_val}.png")
            plt.savefig(output_path)
            plt.close()
            print("Saved plot to", output_path)

Saved plot to ../diagrams/03_http_payload/cpu_100_10000.png
Saved plot to ../diagrams/03_http_payload/memory_100_10000.png
Saved plot to ../diagrams/03_http_payload/cpu_100_100000.png
Saved plot to ../diagrams/03_http_payload/memory_100_100000.png


In [None]:
# %% [code] Process 04_grpc_max_throughput experiment data and plot per container (using payload)

# Define directories
experiment_dir = os.path.join("..", "results", "04_grpc_max_throughput")
output_dir  = os.path.join("..", "diagrams", "04_grpc_max_throughput")
os.makedirs(output_dir, exist_ok=True)

# Get all metrics CSV files in the experiment directory
csv_files = glob.glob(os.path.join(experiment_dir, 'metrics_*.csv'))

if not csv_files:
    print("No CSV files found in:", experiment_dir)
else:
    # Load each CSV and extract additional parameters (mesh, qps, payload)
    df_list = []
    for file in csv_files:
        df = load_metrics_csv(file)
        mesh, qps, payload = extract_mesh_qps_payload(file)
        df['mesh'] = mesh
        df['qps'] = qps
        df['payload'] = payload
        df_list.append(df)
    df_exp = pd.concat(df_list, ignore_index=True)
    
    # Identify unique qps and payload values
    unique_qps = sorted(df_exp['qps'].dropna().unique())
    unique_payload = sorted(df_exp['payload'].dropna().unique())
    
    # For the purpose of these diagrams, we assume qps is fixed; if not, select an appropriate value.
    qps_val = unique_qps[0] if unique_qps else None
    
    # For each unique payload value, generate separate diagrams for each metric.
    for payload_val in unique_payload:
        # Filter the DataFrame for the current payload value.
        df_subset = df_exp[df_exp['payload'] == payload_val]
        for metric, ylabel in [('cpu', "CPU Usage (millicore)"), ('memory', "Memory Usage (MB)")]:
            max_metric = df_subset.groupby(['namespace', 'container'])[metric].max()
            labels = [shorten_label(ns, c) for ns, c in max_metric.index]

            plt.figure(figsize=(15, 10))
            max_metric.index = labels
            max_metric.plot(kind='bar')
            plt.xlabel("Container")
            plt.ylabel(ylabel)
            plt.title(f"04_grpc_max_throughput: Average {ylabel} (qps = {qps_val}, payload = {payload_val})")
            plt.xticks(rotation=45, ha='right')
            plt.tight_layout()
            
            output_path = os.path.join(output_dir, f"{metric}_{qps_val}_{payload_val}.png")
            plt.savefig(output_path)
            plt.close()
            print("Saved plot to", output_path)

Saved plot to ../diagrams/04_grpc_max_throughput/cpu_0_0.png
Saved plot to ../diagrams/04_grpc_max_throughput/memory_0_0.png
