In [284]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from time import sleep

In [285]:
TIME_RESOLUTION = 5 # seconds

In [286]:
data = pd.read_csv("pipeline_k3s_resource_usage.csv").drop(columns=["node"])

In [287]:
def write_image(fig, fname):
    fig.show()

    # create random figure to load math js
    fig2 = px.scatter(x=[0, 1], y=[0, 1])
    fig2.write_image(fname)
    sleep(1)

    fig.write_image(fname)

In [288]:
# change units and convert to int/float
data["cpu"] = data["cpu"].str.replace("m", "").astype(int)
data["memory"] = data["memory"].str.replace("Mi", "").astype(int)
data["cpu_per"] = data["cpu_per"].str.replace("%", "").astype(float) / 100
data["memory_per"] = data["memory_per"].str.replace("%", "").astype(float) / 100

data.head()

Unnamed: 0,timestamp,cpu,cpu_per,memory,memory_per
0,2025-04-23T14:42:22.992238,1294,0.16,5394,0.45
1,2025-04-23T14:42:22.992238,536,0.06,4231,0.53
2,2025-04-23T14:42:22.992238,106,0.01,1744,0.21
3,2025-04-23T14:42:22.992238,389,0.04,1860,0.23
4,2025-04-23T14:42:22.992238,75,0.0,1939,0.24


In [289]:
data = data.groupby("timestamp").agg(
    {
        "cpu": "sum",
        "cpu_per": "mean",
        "memory": "sum",
        "memory_per": "mean"
    }
).reset_index()

data = data.iloc[7:].copy().reset_index(drop=True)
data["time"] = data.index * TIME_RESOLUTION

data.head()

Unnamed: 0,timestamp,cpu,cpu_per,memory,memory_per,time
0,2025-04-23T14:43:00.630697,1158,0.024,15150,0.332,0
1,2025-04-23T14:43:05.969906,1158,0.024,15150,0.332,5
2,2025-04-23T14:43:11.608995,2911,0.066,15171,0.334,10
3,2025-04-23T14:43:17.006884,2911,0.066,15171,0.334,15
4,2025-04-23T14:43:22.393128,2911,0.066,15171,0.334,20


In [290]:
events = pd.read_csv("events.csv")
data.shape, events.shape

((155, 6), (155, 3))

In [291]:
# multiply timestep by time resolution
event_dict = events.to_dict(orient="records")
event_dict = [
    {
        "timestep": i,
        "pods": 2,
        "pipelines": 0,
    }
    for i in range(7)
] + event_dict

events = pd.DataFrame(event_dict)
events["timestep"] = events["timestep"] * TIME_RESOLUTION

In [292]:
data = data[data["time"] < 655]
events = events[events["timestep"] < 655]

In [293]:
# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add CPU usage line trace (primary y-axis)
fig.add_trace(
    go.Scatter(
        x=data["time"],
        y=data["cpu"],
        mode="lines",
        name="CPU",
        line=dict(color="blue"),
    ),
    secondary_y=False,
)

# Add pipeline plot curve (secondary y-axis)
fig.add_trace(
    go.Scatter(
        x=events["timestep"],
        y=events["pipelines"],
        mode="lines",
        name="Pipelines",
        line=dict(color="orange"),
    ),
    secondary_y=True,
)

# Update layout
fig.update_layout(
    xaxis_title="Time (s)",
    yaxis_title="CPU Usage (m)",
    height=400,
    width=1200,
    margin=dict(l=20, r=20, t=20, b=20),
    showlegend=True,
    yaxis=dict(showgrid=False),
)

# Set secondary y-axis title
fig.update_yaxes(title_text="Deployed pipelines", secondary_y=True)
fig.update_yaxes(title_text="CPU Usage (m)", secondary_y=False)
fig.update_yaxes(range=[data["cpu"].min()-50, data["cpu"].max()+250], secondary_y=False)

write_image(fig, "pipelines_cpu_usage.pdf")


In [294]:
# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add CPU usage line trace (primary y-axis)
fig.add_trace(
    go.Scatter(
        x=data["time"],
        y=data["memory"],
        mode="lines",
        name="RAM",
        line=dict(color="blue"),
    ),
    secondary_y=False,
)

# Add pipeline plot curve (secondary y-axis)
fig.add_trace(
    go.Scatter(
        x=events["timestep"],
        y=events["pipelines"],
        mode="lines",
        name="Pipelines",
        line=dict(color="orange"),
    ),
    secondary_y=True,
)

# Update layout
fig.update_layout(
    xaxis_title="Time (s)",
    yaxis_title="Memory usage (MiB)",
    height=400,
    width=1200,
    margin=dict(l=20, r=20, t=20, b=20),
    showlegend=True,
    yaxis=dict(showgrid=False),
)

# Set secondary y-axis title
fig.update_yaxes(title_text="Deployed pipelines", secondary_y=True)
fig.update_yaxes(title_text="Memory usage (MiB)", secondary_y=False)
fig.update_yaxes(range=[data["memory"].min()-50, data["memory"].max()+250], secondary_y=False)

write_image(fig, "pipelines_memory_usage.pdf")