# Analyzing module-wise energies for different architectures

In [2]:
import sys
sys.path.append('../')

In [3]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from utils.data_utils import preprocess_and_normalize_energy_data

# Loading and aggregating the data

In [4]:
df_unnormalized = pd.read_csv('../data/architectures-energies-parsed.csv')
df = preprocess_and_normalize_energy_data(df_unnormalized,['module','batch_size','architecture','layer_idx'], aggregate=True)
df.head()

FileNotFoundError: [Errno 2] No such file or directory: '../data/architectures-energies-parsed.csv'

# Analysis

In [None]:
# compute % contribution
module_wise_contributions_by_architecture = pd.DataFrame(columns=['architecture','module','batch_size','cpu_energy','percent_contribution'])
for a in df.architecture.unique():
    # subset data by architecture
    sub = df.loc[df.architecture == a]
    total_measured_energies = []
    total_agg_energies = []
    for b in sub.batch_size.unique():
        # subset data by batch_size
        sub_b = sub.loc[sub.batch_size == b]
        # get energy from complete architecture run
        total_measured_energy = sub_b.loc[sub_b.layer_idx == 0].cpu_energy.item()
        # subset only data from individual modules of architecture
        sub_b = sub_b.loc[sub_b.layer_idx != 0]
        # compute empirical total energy from complete architecture run by summing up modules
        sub_b = sub_b.groupby(['architecture','batch_size','module'])['cpu_energy'].sum().reset_index()
        total_agg_energy = sum(sub_b.cpu_energy)
        # compute % contribution of module to total energy consumption
        sub_b["percent_contribution"] = round(sub_b.cpu_energy / total_agg_energy * 100, 2)
        module_wise_contributions_by_architecture = pd.concat([module_wise_contributions_by_architecture, sub_b], ignore_index=True)
        # store values
        total_agg_energies.append(total_agg_energy)
        total_measured_energies.append(total_measured_energy)
    # print avg energies; compare energy when running the full architecture vs summing the energy over the individual modules
    print(f"{a}: avg. total-measured-energy: {np.round(np.mean(total_measured_energies),10)}, avg. total-agg-energy: {np.round(np.mean(total_agg_energies),10)}, resulting deviation: {np.round(np.mean([(b - a) / b for a, b in zip(total_measured_energies, total_agg_energies)]),2)*100}%")

# Plotting

In [None]:
plt.figure(figsize=(8,5))
g = sns.barplot(data=module_wise_contributions_by_architecture, x="percent_contribution",y="module",hue="architecture")
g.bar_label(g.containers[0], padding=10)
g.bar_label(g.containers[1], padding=10)
g.set_xlim(0, 100)
plt.legend(loc="lower right")
plt.ylabel("module type")
plt.xlabel("% of total architecture energy")
print("black line shows deviations between batch-sizes")

In [None]:
plt.figure(figsize=(6,8))
sns.lineplot(data=module_wise_contributions_by_architecture, x="batch_size", y="percent_contribution", hue="architecture",style="module")
plt.xlabel("batch size")
plt.ylabel("% of total architecture energy")