In [9]:
# summarize the pods number and the distributions in each node

from kubernetes import client, config

# Load Kubernetes cluster configuration
config.load_kube_config()

# Create a core V1 client
v1 = client.CoreV1Api()

# Define the namespace to filter
namespace_to_filter = "social-network"

# Get all pods in the specific namespace
pods = v1.list_namespaced_pod(namespace=namespace_to_filter, watch=False)

# Create a dictionary to count the pods per node
pod_count_per_node = {}

# Count pods by node in the specified namespace
for pod in pods.items:
    node = pod.spec.node_name
    if node not in pod_count_per_node:
        pod_count_per_node[node] = 0
    pod_count_per_node[node] += 1

# Print the results
for node, count in pod_count_per_node.items():
    print(f"Node: {node}, Number of Pods in '{namespace_to_filter}' namespace: {count}")


Node: k8s-worker-5, Number of Pods in 'social-network' namespace: 4
Node: k8s-worker-7, Number of Pods in 'social-network' namespace: 4
Node: k8s-worker-8, Number of Pods in 'social-network' namespace: 4
Node: k8s-worker-3, Number of Pods in 'social-network' namespace: 6
Node: k8s-worker-6, Number of Pods in 'social-network' namespace: 3
Node: k8s-worker-4, Number of Pods in 'social-network' namespace: 4
Node: k8s-worker-9, Number of Pods in 'social-network' namespace: 2


In [14]:
import subprocess
import os
from datetime import datetime
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from tqdm import tqdm
import seaborn as sns

# Function to execute wrk command
def run_wrk(url, script_path, duration, rate, data_dir):
    current_time_str = datetime.now().strftime("%Y%m%d%H%M")
    result_filename = f"{data_dir}/{current_time_str}_{rate}_{duration}.txt"
    command = f"/home/ubuntu/DeathStarBench/wrk2/wrk -D exp -t2 -c100 -d{duration} -L -s {script_path} {url} -R{rate}"
    result = subprocess.run(command, shell=True, capture_output=True, text=True)
    
    with open(result_filename, 'w') as file:
        file.write(result.stdout)
    
    print(f"Results for rate {rate} saved to {result_filename}")
    return result_filename

# Function to parse wrk output


def parse_wrk_output(filename):
    latencies = []
    recording = False  # Flag to start recording latencies
    with open(filename, 'r') as file:
        for line in file:
            # Start recording after the "Detailed Percentile spectrum:" line
            if line.startswith("  Detailed Percentile spectrum:"):
                recording = True
                continue  # Skip the header line

            # Stop recording at the summary statistics section
            if line.startswith("#[Mean"):
                break

            if recording:
                # Example line: "       2.037     0.000000            1         1.00"
                parts = line.split()
                if len(parts) >= 2:
                    try:
                        # Assuming latencies are reported in milliseconds
                        latency = float(parts[0])  # Convert latency value to float
                        latencies.append(latency)
                    except ValueError:
                        # Handle the case where conversion to float fails
                        continue

    return latencies

# Function to plot CDF
def plot_cdf(data, filename):
    sorted_data = np.sort(data)
    plt.figure(figsize=(10, 6))
    plt.step(sorted_data, np.linspace(0, 1, len(sorted_data), endpoint=False), where='post')
    plt.xlabel('Response Time (ms)')
    plt.ylabel('CDF')
    plt.title('CDF of Response Times')
    plt.grid(True)
    plt.savefig(filename)
    plt.close()

# Function to plot Violin Plot
def plot_violin(latency_data, request_rates, data_dir):
    plt.figure(figsize=(12, 8))
    data_to_plot = [latencies for _, latencies in latency_data.items() if latencies]
    sns.violinplot(data=data_to_plot)
    plt.xticks(np.arange(len(request_rates)), labels=[str(rate) for rate in request_rates])
    plt.xlabel('Request Rate')
    plt.ylabel('Response Time (ms)')
    plt.title('Response Time Distribution by Request Rate')
    plt.grid(True)
    
    current_time_str = datetime.now().strftime("%Y%m%d%H%M")
    filename = f"{data_dir}/violin_plot_{current_time_str}.png"
    plt.savefig(filename)
    plt.close()
    print(f"Saved violin plot to {filename}")


data_dir = "/home/ubuntu/ms_scheduling/social_net/perf_testing/data/"
os.makedirs(data_dir, exist_ok=True)

url = "http://nginx-thrift.social-network.svc.cluster.local:8080/wrk2-api/home-timeline/read"
script_path = "/home/ubuntu/DeathStarBench/socialNetwork/wrk2/scripts/social-network/read-home-timeline.lua"
duration = "10m"
request_rates = [300, 500, 1000, 1500, 2000, 3000, 5000]
result_files = []
latency_data = {}
filename = ['202403291239_300_10m.txt','202403291249_500_10m.txt', '202403291259_1000_10m.txt', '202403291309_1500_10m.txt', '202403291319_2000_10m.txt',
                '202403291329_3000_10m.txt', '202403291339_5000_10m.txt']
i=0
for rate in tqdm(request_rates, desc="Running wrk for different rates"):
    # filename = run_wrk(url, script_path, duration, rate, data_dir)
    

    latencies = parse_wrk_output(data_dir+filename[i])
    latency_data[rate] = latencies
    i=i+1
    # if latencies:
    #     plot_filename = filename.replace('.txt', '_cdf.png')
    #     plot_cdf(latencies, plot_filename)



Running wrk for different rates: 100%|██████████| 7/7 [00:00<00:00, 2047.86it/s]


In [15]:

if latency_data:
    plot_violin(latency_data, request_rates, data_dir)

Saved violin plot to /home/ubuntu/ms_scheduling/social_net/perf_testing/data//violin_plot_202403300627.png


Running wrk for different rates: 100%|██████████| 2/2 [00:00<00:00,  7.00it/s]
