# Energy Evaluation

### Define Energy Intesities

In [13]:
TRACE_RETENTION_HOURS = 360 # 30 days

# energy intensity kWh/GB
ENERGY_INTENSITY_NETWORK_KWH_PER_GB = 0.001875
ENERGY_INTENSITY_STORAGE_WH_PER_TB_HOUR = 0.525 * TRACE_RETENTION_HOURS

## Load pre-processed data
This data can be pre-processed with the help of kepler-evaluation.ipynb (CPU+RAM), network-evaluation.ipynb and storage-evaluation.ipynb. If you want to do new experiments, you have to create these files yourself in the meantioned jupyter notebooks.

In [14]:
import pandas as pd
import tabulate as tabulate
network_data = pd.read_csv("cadvisor_network_bytes_received_all_absolute_bytes.csv", index_col=0)
network_data_transmitted = pd.read_csv("cadvisor_network_bytes_transmitted_all_absolute_bytes.csv", index_col=0)
storage_data = pd.read_csv("cadvisor_storage_usage_writes_all_absolute_bytes.csv", index_col=0)
kepler_data_joules = pd.read_csv("pods_kepler_joules_all_absolute_joules.csv", index_col=0)
kepler_dram = pd.read_csv("pods_kepler_dram_joules_all_joules.csv", index_col=0)


## Network
Further process the network data to transform total bytes received to kWh using the energy intensity

In [15]:
# get the last value for each label based on the time_normalized_rounded as this is the total amount of data received
network_data_processed = network_data.groupby("label").last().reset_index()


# remove the column time_normalized_rounded as it is not needed anymore
network_data_processed = network_data_processed.drop(columns=["time_normalized_rounded"])

# remove _system-under-evaluation from the column names
network_data_processed.columns = network_data_processed.columns.str.replace("_system-under-evaluation", "", regex=False)

network_data_processed = network_data_processed.set_index("label")

network_data_processed = network_data_processed.reindex([
    "Baseline",
    "0%",
    "5%",
    "10%",
    "50%",
    "1s",
    "30s",
    "Base Persistence",
    "Scrape 5s Persistence",
    "5% Persistence",
    "10% Persistence",
    "50% Persistence",
    "Persistence Istio",
])

# drop indexes the experiments that are not needed e.g. Baseline, 0%, 5%, 10%, 50%, 1s, 30s
network_data_processed = network_data_processed.drop(index=[
    "Baseline",
    "0%",
    "5%",
    "10%",
    "50%",
    "1s",
    "30s",
])

# rename the columns to be more readable
network_data_processed = network_data_processed.rename(index={
    "Base Persistence": "Baseline",
    "Scrape 5s Persistence": "Scrape 5s",
    "5% Persistence": "5%",
    "10% Persistence": "10%",
    "50% Persistence": "50%",
    "Persistence Istio": "Istio",
})



# this values are in bytes, convert them to GB
network_data_processed = network_data_processed / (1000 * 1000 * 1000)


print("Total network data received per Experiment (label) in GB:")
print(network_data_processed.round(2).to_markdown())


# calculate the energy consumption in kWh
network_data_processed = network_data_processed * ENERGY_INTENSITY_NETWORK_KWH_PER_GB


# print as table
print("Total network data received per Experiment (label) and energy consumption in kWh:")
print(network_data_processed.to_markdown())
print("Total energy consumption for storage in kWh per Experiment (label) and percentage difference from label 'Baseline':")

for label in network_data_processed.index:
    if label == "Baseline":
        print(f"{label}: {network_data_processed.loc[label].sum()} kWh (100%)")
    else:
        percentage_difference = (network_data_processed.loc[label].sum() / network_data_processed.loc["Baseline"].sum()) * 100
        print(f"{label}: {network_data_processed.loc[label].sum()} kWh ({percentage_difference:.2f}%)")

Total network data received per Experiment (label) in GB:
| label     |   accounting |   ad |   cart |   checkout |   currency |   elasticsearch |   email |   flagd |   fraud-detection |   frontend |   frontend-proxy |   grafana |   image-provider |   jaeger-collector |   jaeger-query |   kafka |   otel-collector |   payment |   product-catalog |   prometheus |   quote |   recommendation |   shipping |   valkey-cart |
|:----------|-------------:|-----:|-------:|-----------:|-----------:|----------------:|--------:|--------:|------------------:|-----------:|-----------------:|----------:|-----------------:|-------------------:|---------------:|--------:|-----------------:|----------:|------------------:|-------------:|--------:|-----------------:|-----------:|--------------:|
| Baseline  |            0 | 0.04 |   0.14 |          0 |          0 |            0.52 |       0 |    0.07 |                 0 |       2.1  |             2.7  |         0 |                0 |               0.05 |  

In [16]:
# get the last value for each label based on the time_normalized_rounded as this is the total amount of data received
network_data_transmitted_processed = network_data_transmitted.groupby("label").last().reset_index()


# remove the column time_normalized_rounded as it is not needed anymore
network_data_transmitted_processed = network_data_transmitted_processed.drop(columns=["time_normalized_rounded"])

# remove _system-under-evaluation from the column names
network_data_transmitted_processed.columns = network_data_transmitted_processed.columns.str.replace("_system-under-evaluation", "", regex=False)

network_data_transmitted_processed = network_data_transmitted_processed.set_index("label")

network_data_transmitted_processed = network_data_transmitted_processed.reindex([
    "Baseline",
    "0%",
    "5%",
    "10%",
    "50%",
    "1s",
    "30s",
    "Base Persistence",
    "Scrape 5s Persistence",
    "5% Persistence",
    "10% Persistence",
    "50% Persistence",
    "Persistence Istio"
])

# drop indexes the experiments that are not needed e.g. Baseline, 0%, 5%, 10%, 50%, 1s, 30s
network_data_transmitted_processed = network_data_transmitted_processed.drop(index=[
    "Baseline",
    "0%",
    "5%",
    "10%",
    "50%",
    "1s",
    "30s",
])

# rename the columns to be more readable
network_data_transmitted_processed = network_data_transmitted_processed.rename(index={
    "Base Persistence": "Baseline",
    "Scrape 5s Persistence": "Scrape 5s",
    "5% Persistence": "5%",
    "10% Persistence": "10%",
    "50% Persistence": "50%",
    "Persistence Istio": "Istio",
})


# this values are in bytes, convert them to GB
network_data_transmitted_processed = network_data_transmitted_processed / (1000 * 1000 * 1000)


print("Total network data transmitted per Experiment (label) in GB:")
print(network_data_transmitted_processed.round(2).to_markdown())


# calculate the energy consumption in kWh
network_data_transmitted_processed = network_data_transmitted_processed * ENERGY_INTENSITY_NETWORK_KWH_PER_GB


# print as table
print("Total network data transmitted per Experiment (label) and energy consumption in kWh:")
print(network_data_transmitted_processed.to_markdown())
print("Total energy consumption for storage in kWh per Experiment (label) and percentage difference from label 'Baseline':")

for label in network_data_transmitted_processed.index:
    if label == "Baseline":
        print(f"{label}: {network_data_transmitted_processed.loc[label].sum()} kWh (100%)")
    else:
        percentage_difference = (network_data_transmitted_processed.loc[label].sum() / network_data_transmitted_processed.loc["Baseline"].sum()) * 100
        print(f"{label}: {network_data_transmitted_processed.loc[label].sum()} kWh ({percentage_difference:.2f}%)")

Total network data transmitted per Experiment (label) in GB:
| label     |   accounting |   ad |   cart |   checkout |   currency |   elasticsearch |   email |   flagd |   fraud-detection |   frontend |   frontend-proxy |   grafana |   image-provider |   jaeger-collector |   jaeger-query |   kafka |   otel-collector |   payment |   product-catalog |   prometheus |   quote |   recommendation |   shipping |   valkey-cart |
|:----------|-------------:|-----:|-------:|-----------:|-----------:|----------------:|--------:|--------:|------------------:|-----------:|-----------------:|----------:|-----------------:|-------------------:|---------------:|--------:|-----------------:|----------:|------------------:|-------------:|--------:|-----------------:|-----------:|--------------:|
| Baseline  |         0    | 0.4  |   0.98 |       0    |       0    |            0.01 |    0    |    0.15 |              0    |       5.28 |             6.22 |      0    |             0    |               0.52 

## Storage
Further process the storage data to transform total bytes used to kWh using the energy intensity

In [17]:
storage_data_processed = storage_data.groupby("label").last().reset_index()

# remove the column time_normalized_rounded as it is not needed anymore
storage_data_processed = storage_data_processed.drop(columns=["time_normalized_rounded"])

# remove _system-under-evaluation from the column names
storage_data_processed.columns = storage_data_processed.columns.str.replace("_system-under-evaluation", "", regex=False)

storage_data_processed = storage_data_processed.set_index("label")

# sort the index for readability in this order: Baseline, 5%, 10%, 50%, Baseline Persistence, 5% Persistence, 10% Persistence, 50% Persistence
storage_data_processed = storage_data_processed.reindex([
    "Baseline",
    "0%",
    "5%",
    "10%",
    "50%",
    "1s",
    "30s",
    "Base Persistence",
    "Scrape 5s Persistence",
    "5% Persistence",
    "10% Persistence",
    "50% Persistence",
    "Persistence Istio",
])

# drop indexes the experiments that are not needed e.g. Baseline, 0%, 5%, 10%, 50%, 1s, 30s
storage_data_processed = storage_data_processed.drop(index=[
    "Baseline",
    "0%",
    "5%",
    "10%",
    "50%",
    "1s",
    "30s",
])

# rename the columns to be more readable
storage_data_processed = storage_data_processed.rename(index={
    "Base Persistence": "Baseline",
    "Scrape 5s Persistence": "Scrape 5s",
    "5% Persistence": "5%",
    "10% Persistence": "10%",
    "50% Persistence": "50%",
    "Persistence Istio": "Istio",
})

# rename opentelemetry-collector to otel-collector
storage_data_processed = storage_data_processed.rename(index={"opentelemetry-collector": "otel-collector"})


# this values are in bytes, convert them to TB
storage_data_processed = storage_data_processed / (1000 * 1000 * 1000 * 1000)



print("Storage data used per Experiment (label) in GB:")
print(storage_data_processed.multiply(1000).to_markdown())

# calculate the energy consumption in kWh
storage_data_processed = (storage_data_processed * ENERGY_INTENSITY_STORAGE_WH_PER_TB_HOUR) / 1000




# print as table
print("Total energy consumption for storage in kWh per Experiment (label):")
print(storage_data_processed.to_markdown())
print("Total energy consumption for storage in kWh per Experiment (label) and percentage difference from label 'Baseline':")

for label in storage_data_processed.index:
    if label == "Baseline":
        print(f"{label}: {storage_data_processed.loc[label].sum()} kWh (100%)")
    else:
        percentage_difference = (storage_data_processed.loc[label].sum() / storage_data_processed.loc["Baseline"].sum()) * 100
        print(f"{label}: {storage_data_processed.loc[label].sum()} kWh ({percentage_difference:.2f}%)")


Storage data used per Experiment (label) in GB:
| label     |   accounting |        ad |   elasticsearch |   email |   flagd |   fraud-detection |   frontend |   frontend-proxy |     grafana |   image-provider |   jaeger |     kafka |   opensearch |   opentelemetry-collector |   payment |   prometheus |   recommendation |   valkey-cart |
|:----------|-------------:|----------:|----------------:|--------:|--------:|------------------:|-----------:|-----------------:|------------:|-----------------:|---------:|----------:|-------------:|--------------------------:|----------:|-------------:|-----------------:|--------------:|
| Baseline  |            0 | 0.0219914 |         3.88613 |       0 |       0 |         0.0193577 |          0 |                0 | 0.000466944 |                0 |      nan | 0.092332  |      17.0718 |                       nan |         0 |    0.116314  |                0 |   0.00028672  |
| Scrape 5s |            0 | 0.0220037 |         3.776   |       0 |       0

Further process the CPU energy data. We use the average over the experiment as the kWh

In [18]:
""" kepler_data_processed = kepler_data.groupby("label").mean()

# remove the column time_normalized_rounded as it is not needed anymore
kepler_data_processed = kepler_data_processed.drop(columns=["time_normalized_rounded"])

# remove _system-under-evaluation from the column names
kepler_data_processed.columns = kepler_data_processed.columns.str.replace("_Auxiliary-under-evaluation", "", regex=False)

# sort the index for readability in this order: Baseline, 5%, 10%, 50%, Baseline Persistence, 5% Persistence, 10% Persistence, 50% Persistence
kepler_data_processed = kepler_data_processed.reindex([
    "Baseline",
    "0%",
    "5%",
    "10%",
    "50%",
    "1s",
    "Base Persistence",
    "5% Persistence",
    "10% Persistence",
    "50% Persistence",
    "Base Persistence Istio"
])

# print as table
print("Total energy consumption in kWh per Experiment (label):")
print(kepler_data_processed.to_markdown())
print("Total energy consumption in kWh per Experiment (label) and percentage difference from label 'Baseline':")

for label in kepler_data_processed.index:
    if label == "Baseline":
        print(f"{label}: {kepler_data_processed.loc[label].sum()} kWh (100%)")
    else:
        percentage_difference = (kepler_data_processed.loc[label].sum() / kepler_data_processed.loc["Baseline"].sum()) * 100
        print(f"{label}: {kepler_data_processed.loc[label].sum()} kWh ({percentage_difference:.2f}%)") """

' kepler_data_processed = kepler_data.groupby("label").mean()\n\n# remove the column time_normalized_rounded as it is not needed anymore\nkepler_data_processed = kepler_data_processed.drop(columns=["time_normalized_rounded"])\n\n# remove _system-under-evaluation from the column names\nkepler_data_processed.columns = kepler_data_processed.columns.str.replace("_Auxiliary-under-evaluation", "", regex=False)\n\n# sort the index for readability in this order: Baseline, 5%, 10%, 50%, Baseline Persistence, 5% Persistence, 10% Persistence, 50% Persistence\nkepler_data_processed = kepler_data_processed.reindex([\n    "Baseline",\n    "0%",\n    "5%",\n    "10%",\n    "50%",\n    "1s",\n    "Base Persistence",\n    "5% Persistence",\n    "10% Persistence",\n    "50% Persistence",\n    "Base Persistence Istio"\n])\n\n# print as table\nprint("Total energy consumption in kWh per Experiment (label):")\nprint(kepler_data_processed.to_markdown())\nprint("Total energy consumption in kWh per Experiment 

In [19]:
kepler_data_joules_processed = kepler_data_joules.groupby("label").last()

# remove the column time_normalized_rounded as it is not needed anymore
kepler_data_joules_processed = kepler_data_joules_processed.drop(columns=["time_normalized_rounded"])

# remove _system-under-evaluation from the column names
kepler_data_joules_processed.columns = kepler_data_joules_processed.columns.str.replace("_system-under-evaluation", "", regex=False)

# drop the columns that are not needed
kepler_data_joules_processed = kepler_data_joules_processed.drop(columns=["master", "worker"])


# sort the index for readability in this order: Baseline, 5%, 10%, 50%, Baseline Persistence, 5% Persistence, 10% Persistence, 50% Persistence
kepler_data_joules_processed = kepler_data_joules_processed.reindex([
    "Baseline",
    "0%",
    "5%",
    "10%",
    "50%",
    "1s",
    "30s",
    "Base Persistence",
    "Scrape 5s Persistence",
    "5% Persistence",
    "10% Persistence",
    "50% Persistence",
    "Persistence Istio",
])  

# drop indexes the experiments that are not needed e.g. Baseline, 0%, 5%, 10%, 50%, 1s, 30s
kepler_data_joules_processed = kepler_data_joules_processed.drop(index=[
    "Baseline",
    "0%",
    "5%",
    "10%",
    "50%",
    "1s",
    "30s",
])

# rename the columns to be more readable
kepler_data_joules_processed = kepler_data_joules_processed.rename(index={
    "Base Persistence": "Baseline",
    "Scrape 5s Persistence": "Scrape 5s",
    "5% Persistence": "5%",
    "10% Persistence": "10%",
    "50% Persistence": "50%",
    "Persistence Istio": "Istio",
})

kepler_data_joules_processed_total = kepler_data_joules_processed.copy()
kepler_data_joules_processed_total["Total"] = kepler_data_joules_processed.sum(axis=1)

# print as table
print("Total energy consumption in Joules per Experiment (label):")
print(kepler_data_joules_processed_total.to_markdown())

# convert Joules to kWh
kepler_data_joules_processed = kepler_data_joules_processed / 3600000  # 1 kWh = 3.6 million Joules
print("Total energy consumption in kWh per Experiment (label) and percentage difference from label 'Baseline':")

for label in kepler_data_joules_processed.index:
    if label == "Baseline":
        print(f"{label}: {kepler_data_joules_processed.loc[label].sum()} kWh (100%)")
    else:
        percentage_difference = (kepler_data_joules_processed.loc[label].sum() / kepler_data_joules_processed.loc["Baseline"].sum()) * 100
        print(f"{label}: {kepler_data_joules_processed.loc[label].sum()} kWh ({percentage_difference:.2f}%)")


Total energy consumption in Joules per Experiment (label):
| label     |   accounting |      ad |    cart |   checkout |   configfile |   currency |   elasticsearch |   email |   exporter |    flagd |   flagd-ui |   fraud-detection |   frontend |   frontend-proxy |   grafana |   image-provider |   init-config |   istio-init |   istio-proxy |   jaeger-agent-sidecar |   jaeger-collector |   jaeger-query |    kafka |   opensearch |   opentelemetry-collector |   payment |   product-catalog |   prometheus-server |   quote |   recommendation |   shipping |   valkey-cart |   wait-for-kafka |   wait-for-valkey-cart |   Total |
|:----------|-------------:|--------:|--------:|-----------:|-------------:|-----------:|----------------:|--------:|-----------:|---------:|-----------:|------------------:|-----------:|-----------------:|----------:|-----------------:|--------------:|-------------:|--------------:|-----------------------:|-------------------:|---------------:|---------:|-------------:|

# DRAM
Does not work as expected currently. Values are strange and mostly negativ.


In [20]:
kepler_ram_joules_processed = kepler_dram.groupby("label").last()

# remove the column time_normalized_rounded as it is not needed anymore
kepler_ram_joules_processed = kepler_ram_joules_processed.drop(columns=["time_normalized_rounded"])

# remove _system-under-evaluation from the column names
kepler_ram_joules_processed.columns = kepler_ram_joules_processed.columns.str.replace("_system-under-evaluation", "", regex=False)

# sort the index for readability in this order: Baseline, 5%, 10%, 50%, Baseline Persistence, 5% Persistence, 10% Persistence, 50% Persistence
kepler_ram_joules_processed = kepler_ram_joules_processed.reindex([
    "Baseline",
    "0%",
    "5%",
    "10%",
    "50%",
    "1s",
    "30s",
    "Base Persistence",
    "Scrape 5s Persistence",
    "5% Persistence",
    "10% Persistence",
    "50% Persistence",
    "Persistence Istio",
])  


# drop indexes the experiments that are not needed e.g. Baseline, 0%, 5%, 10%, 50%, 1s, 30s
kepler_ram_joules_processed = kepler_ram_joules_processed.drop(index=[
    "Baseline",
    "0%",
    "5%",
    "10%",
    "50%",
    "1s",
    "30s",
])

# rename the columns to be more readable
kepler_ram_joules_processed = kepler_ram_joules_processed.rename(index={
    "Base Persistence": "Baseline",
    "Scrape 5s Persistence": "Scrape 5s",
    "5% Persistence": "5%",
    "10% Persistence": "10%",
    "50% Persistence": "50%",
    "Persistence Istio": "Istio",
})

kepler_ram_joules_processed_total = kepler_ram_joules_processed.copy()
kepler_ram_joules_processed_total["Total"] = kepler_ram_joules_processed.sum(axis=1)

# print as table
print("DRAM energy consumption in Joules per Experiment (label):")
print(kepler_ram_joules_processed_total.to_markdown())

# convert Joules to kWh
kepler_ram_joules_processed = kepler_ram_joules_processed / 3600000  # 1 kWh = 3.6 million Joules
print("Total energy consumption in kWh per Experiment (label) and percentage difference from label 'Baseline':")

for label in kepler_ram_joules_processed.index:
    if label == "Baseline":
        print(f"{label}: {kepler_ram_joules_processed.loc[label].sum()} kWh (100%)")
    else:
        percentage_difference = (kepler_ram_joules_processed.loc[label].sum() / kepler_ram_joules_processed.loc["Baseline"].sum()) * 100
        print(f"{label}: {kepler_ram_joules_processed.loc[label].sum()} kWh ({percentage_difference:.2f}%)")


DRAM energy consumption in Joules per Experiment (label):
| label     |   accounting |       ad |         cart |   checkout |   configfile |   currency |   elasticsearch |   email |   exporter |        flagd |   flagd-ui |   fraud-detection |   frontend |   frontend-proxy |   grafana |   image-provider |   init-config |   istio-init |   istio-proxy |   jaeger-agent-sidecar |   jaeger-collector |   jaeger-query |    kafka |       master |   opensearch |   opentelemetry-collector |   payment |   product-catalog |   prometheus-server |   quote |   recommendation |   shipping |   valkey-cart |   wait-for-kafka |   wait-for-valkey-cart |   worker |    Total |
|:----------|-------------:|---------:|-------------:|-----------:|-------------:|-----------:|----------------:|--------:|-----------:|-------------:|-----------:|------------------:|-----------:|-----------------:|----------:|-----------------:|--------------:|-------------:|--------------:|-----------------------:|------------------

Categorize the energy consumption into "App", "Auxiliary" and "System"

In [21]:
# | label            |   accounting |      ad |    cart |   checkout |   configfile |   copy-default-plugins |   currency |   elasticsearch |   elasticsearch-checker |   email |   flagd |   flagd-ui |   fraud-detection |   frontend |   frontend-proxy |   grafana |   image-provider |   init-config |   jaeger |   jaeger-agent-sidecar |   jaeger-collector |   jaeger-query |   kafka |   master |   opensearch |   opentelemetry-collector |   payment |   product-catalog |   prometheus-server |   quote |   recommendation |   shipping |   sysctl |   valkey-cart |   wait-for-kafka |   wait-for-valkey-cart |   worker |

kepler_mapping = {
    "accounting": "Primary",
    "ad": "Primary",
    "cart": "Primary",
    "checkout": "Primary",
    "configfile": "Primary",
    "copy-default-plugins": "Primary",
    "currency": "Primary",
    "elasticsearch": "Auxiliary",
    "elasticsearch-checker": "Auxiliary",
    "email": "Primary",
    "exporter": "Auxiliary",
    "flagd": "Auxiliary",
    "flagd-ui": "Auxiliary",
    "fraud-detection": "Primary",
    "frontend": "Primary",
    "frontend-proxy": "Primary",
    "grafana": "Auxiliary",
    "image-provider": "Primary",
    "init-config": "Primary",
    "istio-init": "Auxiliary",
    "istio-proxy": "Auxiliary",
    "jaeger": "Auxiliary",
    "jaeger-agent-sidecar": "Auxiliary",
    "jaeger-collector": "Auxiliary",
    "jaeger-query": "Auxiliary",
    "kafka": "Primary",
    "master": "Primary",
    "opensearch": "Auxiliary",
    "opentelemetry-collector": "Auxiliary",
    "payment": "Primary",
    "product-catalog": "Primary",
    "prometheus-server": "Auxiliary",
    "quote": "Primary",
    "recommendation": "Primary",
    "shipping": "Primary",
    "sysctl": "Auxiliary",
    "valkey-cart": "Primary",
    "wait-for-kafka": "Primary",
    "wait-for-valkey-cart": "Primary",
    "worker": "Primary",
}

#| label            |   accounting |          ad |   elasticsearch |   email |   flagd |   fraud-detection |     frontend |   frontend-proxy |     grafana |   image-provider |       kafka |   opensearch |   payment |   prometheus |   recommendation |   shipping |   valkey-cart |

storage_data_mapping = {
    "accounting": "Primary",
    "ad": "Primary",
    "elasticsearch": "Auxiliary",
    "email": "Primary",
    "flagd": "Auxiliary",
    "fraud-detection": "Primary",
    "frontend": "Primary",
    "frontend-proxy": "Primary",
    "grafana": "Auxiliary",
    "image-provider": "Primary",
    "kafka": "Primary",
    "opensearch": "Auxiliary",
    "payment": "Primary",
    "prometheus": "Auxiliary",
    "recommendation": "Primary",
    "shipping": "Primary",
    "valkey-cart": "Primary",
    "quote": "Primary",
    "product-catalog": "Primary",
    "opentelemetry-collector": "Auxiliary",
    "checkout": "Primary",
    "cart": "Primary",
    "jaeger": "Auxiliary",
    "currency": "Primary",
}

# | label            |   accounting |          ad |        cart |    checkout |    currency |   elasticsearch |   email |       flagd |   fraud-detection |   frontend |   frontend-proxy |     grafana |   image-provider |      jaeger |       kafka |   opensearch |   opentelemetry-collector |     payment |   product-catalog |   prometheus |   quote |   recommendation |   shipping |   valkey-cart |

network_data_mapping = {
    "accounting": "Primary",
    "ad": "Primary",
    "cart": "Primary",
    "checkout": "Primary",
    "currency": "Primary",
    "elasticsearch": "Auxiliary",
    "email": "Primary",
    "flagd": "Auxiliary",
    "fraud-detection": "Primary",
    "frontend": "Primary",
    "frontend-proxy": "Primary",
    "grafana": "Auxiliary",
    "image-provider": "Primary",
    "jaeger": "Auxiliary",
    "jaeger-collector": "Auxiliary", 
    "jaeger-query": "Auxiliary", 
    "otel-collector": "Auxiliary", 
    "kafka": "Primary",
    "opensearch": "Auxiliary",
    "opentelemetry-collector": "Auxiliary",
    "payment": "Primary",
    "product-catalog": "Primary",
    "prometheus": "Auxiliary",
    "quote": "Primary",
    "recommendation": "Primary",
    "shipping": "Primary",
    "valkey-cart": "Primary",
}


import pandas as pd

def categorize_energy_consumption(data: pd.DataFrame, mapping: dict) -> pd.DataFrame:
    """
    Categorize and sum energy consumption based on component categories.

    Parameters:
    - data (pd.DataFrame): A DataFrame where rows represent measurements and columns represent component labels.
    - mapping (dict): A mapping from component label (column name) to a category (e.g., "Primary", "Auxiliary").

    Returns:
    - pd.DataFrame: A DataFrame with the same row indices as `data` and columns being the summed energy per category,
                    including an 'Uncategorized' column if needed.
    """
    # Initialize a DataFrame to store summed results
    categorized = pd.DataFrame(index=data.index)

    # Identify columns that are in the mapping
    mapped_cols = set(mapping.keys())
    data_cols = set(data.columns)
    unmapped_cols = list(data_cols - mapped_cols)

    # Print warning for unmapped columns
    if unmapped_cols:
        print(f"[WARNING] Uncategorized columns: {unmapped_cols}")

    # Iterate through unique categories
    for category in set(mapping.values()):
        cols = [col for col in data.columns if mapping.get(col) == category]
        categorized[category] = data[cols].sum(axis=1) if cols else 0

    # Add uncategorized columns
    if unmapped_cols:
        categorized["Uncategorized"] = data[unmapped_cols].sum(axis=1)
    
    return categorized


# Categorize the energy consumption for each dataset
network_categorized = categorize_energy_consumption(network_data_processed, network_data_mapping)
storage_categorized = categorize_energy_consumption(storage_data_processed, storage_data_mapping)
kepler_categorized = categorize_energy_consumption(kepler_data_joules_processed, kepler_mapping)

# Print the categorized energy consumption
print("\nNetwork Energy Consumption Categorized:")
print(network_categorized.to_markdown())
print("\nStorage Energy Consumption Categorized:")
print(storage_categorized.to_markdown())
print("\nKepler Energy Consumption Categorized:")
print(kepler_categorized.to_markdown())

# add a suffix to each set of columns
net = network_categorized.add_suffix(" (Network)")
cpu = kepler_categorized.add_suffix(" (CPU+Mem)")
sto = storage_categorized.add_suffix(" (Storage)")

categorized_combined_suffix = pd.concat([net, cpu, sto], axis=1) * 1000
categorized_combined_suffix = categorized_combined_suffix.sort_index(axis=1)

# Compute total energy consumption per row
categorized_combined_suffix["Total"] = categorized_combined_suffix.sum(axis=1)

# Calculate percentage change from the "Baseline" row
baseline_total = categorized_combined_suffix.loc["Baseline", "Total"]
categorized_combined_suffix["Total Change (%)"] = (
    (categorized_combined_suffix["Total"] - baseline_total) / baseline_total * 100
).round(2)

print("\nCombined Categorized Energy Consumption by origin in Wh:")
print(categorized_combined_suffix.round(2).to_markdown())

# merge the categorized dataframes and sum on same column name
categorized_combined = pd.concat([network_categorized, storage_categorized, kepler_categorized], axis=1)
categorized_combined = categorized_combined.T.groupby(categorized_combined.columns).sum().T


# Print the combined categorized energy consumption
#print("\nCombined Categorized Energy Consumption:")
#print(categorized_combined.to_markdown())

# add total summing up the categories for each experiment (label)
categorized_combined["Total"] = categorized_combined.sum(axis=1)

#categorized_combined_transpose = categorized_combined.transpose()
#print(categorized_combined_transpose.to_markdown())

categorized_combined_transpose_wh = categorized_combined.T.copy() * 1000  # convert kWh to Wh

print("\nCombined Categorized Energy Consumption in Wh:")
print(categorized_combined_transpose_wh.round(2).to_markdown())

# Combine all three sources on service/component label level
combined_per_label = pd.concat([
    network_data_processed,
    storage_data_processed,
    kepler_data_joules_processed
], axis=1)

# Group by column name (i.e., per service/component), and sum if duplicate columns exist
combined_per_label = combined_per_label.groupby(combined_per_label.columns, axis=1).sum()

combined_per_label["Total"] = combined_per_label.sum(axis=1)

combined_per_label_wh = combined_per_label * 1000  # convert kWh to Wh

print("\nCombined Total Energy Consumption per Service (Wh):")
print(combined_per_label_wh.round(2).to_markdown())


combined_per_label_wh_T = combined_per_label_wh.T  # transposed: services as rows
combined_per_label_wh_T["Total"] = combined_per_label_wh_T.sum(axis=1)

print("\nTransposed: Total Energy per Service across all Experiments (Wh):")
print(combined_per_label_wh_T.round(2).to_markdown())






Network Energy Consumption Categorized:
| label     |   Auxiliary |   Primary |
|:----------|------------:|----------:|
| Baseline  |   0.0185332 | 0.0129566 |
| Scrape 5s |   0.020428  | 0.0127274 |
| 5%        |   0.0222951 | 0.0128653 |
| 10%       |   0.0265756 | 0.0126258 |
| 50%       |   0.0635977 | 0.0124048 |
| Istio     |   0.0179034 | 0.0134179 |

Storage Energy Consumption Categorized:
| label     |   Auxiliary |     Primary |
|:----------|------------:|------------:|
| Baseline  |  0.00398312 | 2.53199e-05 |
| Scrape 5s |  0.00407867 | 2.50188e-05 |
| 5%        |  0.00540546 | 2.56071e-05 |
| 10%       |  0.0070713  | 2.59369e-05 |
| 50%       |  0.02244    | 2.72398e-05 |
| Istio     |  0.00318553 | 2.47958e-05 |

Kepler Energy Consumption Categorized:
| label     |   Auxiliary |   Primary |
|:----------|------------:|----------:|
| Baseline  |   0.0134485 | 0.0723283 |
| Scrape 5s |   0.014324  | 0.0707655 |
| 5%        |   0.0151614 | 0.0718754 |
| 10%       |   0.0171

  combined_per_label = combined_per_label.groupby(combined_per_label.columns, axis=1).sum()


# Latex Tables for the Paper

In [37]:
latex_categorized_combined_suffix = categorized_combined_suffix.round(2).copy()


# order columns 
latex_categorized_combined_suffix = latex_categorized_combined_suffix.reindex(
    columns=[
        "Primary (CPU+Mem)",
        "Primary (Network)",
        "Primary (Storage)",
        "Auxiliary (CPU+Mem)",
        "Auxiliary (Network)",
        "Auxiliary (Storage)",
        "Total",
        "Total Change (%)"
    ]
)

latex_categorized_combined_suffix.columns = pd.MultiIndex.from_tuples([
    ("Primary", "CPU+Mem"),
    ("Primary", "Network"),
    ("Primary", "Storage"),
    ("Auxiliary", "CPU+Mem"),
    ("Auxiliary", "Network"),
    ("Auxiliary", "Storage"),
    ("Total", "Absolut"),
    ("Total", "%")
])




latex_categorized_combined_suffix.index.name = ""
latex = latex_categorized_combined_suffix.to_latex(
    escape=True,              # keep % symbols
    multicolumn=True,
    multicolumn_format='c',
    column_format='l|rrr|rrr|rrr',  # first col left-aligned, rest right-aligned
    float_format="%.2f",
    caption=r"Energy consumption of the experiment runs in Wh divided into the "
            r"categories Primary and Auxiliary services.",
    label=r"tab:energy",
)
print(latex)

\begin{table}
\caption{Energy consumption of the experiment runs in Wh divided into the categories Primary and Auxiliary services.}
\label{tab:energy}
\begin{tabular}{l|rrr|rrr|rrr}
\toprule
 & \multicolumn{3}{c}{Primary} & \multicolumn{3}{c}{Auxiliary} & \multicolumn{2}{c}{Total} \\
 & CPU+Mem & Network & Storage & CPU+Mem & Network & Storage & Absolut & \% \\
 &  &  &  &  &  &  &  &  \\
\midrule
Baseline & 72.33 & 12.96 & 0.03 & 13.45 & 18.53 & 3.98 & 121.28 & 0.00 \\
Scrape 5s & 70.77 & 12.73 & 0.03 & 14.32 & 20.43 & 4.08 & 122.35 & 0.89 \\
5\% & 71.88 & 12.87 & 0.03 & 15.16 & 22.30 & 5.41 & 127.63 & 5.24 \\
10\% & 70.75 & 12.63 & 0.03 & 17.11 & 26.58 & 7.07 & 134.16 & 10.63 \\
50\% & 66.20 & 12.40 & 0.03 & 34.38 & 63.60 & 22.44 & 199.06 & 64.14 \\
Istio & 66.90 & 13.42 & 0.02 & 29.26 & 17.90 & 3.19 & 130.69 & 7.77 \\
\bottomrule
\end{tabular}
\end{table}



In [None]:
df = combined_per_label_wh_T.round(2).copy()   # rows = services, cols = experiments
df.index.name = ""                               # keeps top-left LaTeX cell blank

redacted = 0
# loop and drop rows where total is 0.00
for index, row in df.iterrows():
    if row["Total"] == 0.00:
        df.drop(index, inplace=True)
        redacted += 1
        

# Map every experiment column to either "In-Memory", "Elasticsearch", or "" (no group)
group_map = {
    "Service Type": "",
    "Baseline":   "",
    "Low":         "Tracing",
    "Medium":        "Tracing",
    "High":        "Tracing",
    "HighM":  "Monitoring",      # rename if you prefer “0 %” etc.
    "Istio":      "Service Mesh",
}

# Ensure the DataFrame has the columns in the order we want:
ordered_cols = [
    "Service Type", "Baseline", "5%", "10%", "50%", "Scrape 5s", "Istio"
]

# merging rows that are the same but have different names (eg. prometheus and prometheus-server)
df = df.rename(index={
    "prometheus-server": "prometheus",
    "opentelemetry-collector": "otel-collector",    
    "jaeger-agent-sidecar": "jaeger",
    "jaeger-collector": "jaeger",
    "jaeger-query": "jaeger",
    "opensearch": "opensearch",
    "opensearch-checker": "opensearch",
    "elasticsearch": "elasticsearch",
})

# Sum them up to get the service totals
df = df.groupby(df.index).sum()

service_types = {
    "accounting": "Primary",
    "ad": "Primary",
    "cart": "Primary",
    "checkout": "Primary",
    "configfile": "Primary",
    "copy-default-plugins": "Primary",
    "currency": "Primary",
    "email": "Primary",
    "elasticsearch": "Auxiliary",
    "exporter": "Auxiliary",
    "flagd": "Primary",
    "flagd-ui": "Primary",
    "fraud-detection": "Primary",
    "frontend": "Primary",
    "frontend-proxy": "Primary",
    "grafana": "Auxiliary",
    "image-provider": "Primary",
    "init-config": "Primary",
    "istio-proxy": "Auxiliary",
    "jaeger": "Auxiliary",
    "kafka": "Primary",
    "master": "Primary",
    "opensearch": "Auxiliary",
    "otel-collector": "Auxiliary",
    "payment": "Primary",
    "product-catalog": "Primary",
    "prometheus": "Auxiliary",
    "quote": "Primary",
    "recommendation": "Primary",
    "shipping": "Primary",
    "sysctl": "Auxiliary",
    "valkey-cart": "Primary",
    "wait-for-kafka": "Primary",
    "wait-for-valkey-cart": "Primary",
    "worker": "Primary",
}

df["Service Type"] = df.index.map(service_types)

df = df[ordered_cols]

df = df.rename(columns={
    "5%": "Low",
    "10%": "Medium",
    "50%": "High",
    "Scrape 5s": "HighM",
})

top   = [group_map[c] for c in df.columns]
lower = df.columns                    # or any tidy-up you like
df.columns = pd.MultiIndex.from_arrays([top, lower])

latex = df.to_latex(
    escape=True,
    multicolumn=True,
    multicolumn_format='c',
    column_format='ll|r|rrr|r|r',
    float_format="%.2f",
    caption=f"Energy usage in Wh per Service. Combination of CPU, memory, network and storage energy consumption. {redacted} services where redacted that used less then 0.01Wh",
    label="tab:energy_per_service",
)
print(latex)


\begin{table}
\caption{Energy usage in Wh per Service. Combination of CPU, memory, network and storage energy consumption. 13 services where redacted that used less then 0.01Wh}
\label{tab:energy_per_service}
\begin{tabular}{l|lr|rrr|r|r}
\toprule
 & \multicolumn{2}{c}{} & \multicolumn{3}{c}{Tracing} & Monitoring & Service Mesh \\
label & Service Type & Baseline & Low & Medium & High & HighM & Istio \\
 &  &  &  &  &  &  &  \\
\midrule
Total & NaN & 121.28 & 127.63 & 134.16 & 199.06 & 122.35 & 130.69 \\
accounting & Primary & 0.02 & 0.02 & 0.02 & 0.02 & 0.02 & 0.02 \\
ad & Primary & 0.65 & 0.68 & 0.66 & 0.61 & 0.65 & 0.63 \\
cart & Primary & 2.35 & 2.29 & 2.23 & 1.96 & 2.20 & 1.98 \\
checkout & Primary & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 & 0.00 \\
elasticsearch & Auxiliary & 2.60 & 8.98 & 16.96 & 81.95 & 2.51 & 2.21 \\
exporter & Auxiliary & 0.01 & 0.01 & 0.01 & 0.01 & 0.01 & 0.01 \\
flagd & Primary & 0.51 & 0.51 & 0.01 & 0.47 & 0.49 & 1.01 \\
fraud-detection & Primary & 0.10 & 0.10 & 0