In this notebook the trained energy predictors for each layer-type are tested against real world measurements of various architectures.

In [1]:
import pandas as pd
import yaml
import sys
sys.path.append('../')
import seaborn as sns
sns.set(font_scale=1.2)
import torchvision.models as models
from matplotlib import pyplot as plt
from utils.data_utils import preprocess_and_normalize_energy_data, parse_codecarbon_output
from utils.architecture_utils import get_modules_from_architecture
from run_estimation import compute_energy_estimate
from sklearn.metrics import r2_score
%load_ext autoreload
%autoreload 2
pd.set_option("display.precision", 5)

# Custom Definitions

In [2]:
# dictionary of architecture names and their PyTorch instances
architectures_dict = {
    'alexnet': models.alexnet(weights=None),
    'vgg13': models.vgg13(weights=None),
    'vgg11': models.vgg11(weights=None),
    'vgg16': models.vgg16(weights=None),
    'resnet18': models.resnet18(weights=None),
    'resnet34': models.resnet34(weights=None),
    'resnet50': models.resnet50(weights=None)
}


KeyboardInterrupt



# Data Preparation
## Load Data & Normalize

In [None]:
d1 = parse_codecarbon_output(
    '../data/architectures/00_architectures-layerwise-raw.csv',
    False,
    ('../data/architectures/00_architectures-layerwise-slurm-log.out','architectures',False,3)
)
print("dataset shape:", d1.shape)

In [None]:
d2 = parse_codecarbon_output(
    '../data/architectures/01_architectures-layerwise-raw.csv',
    False,
    ('../data/architectures/01_architectures-layerwise-slurm-log.out','architectures',False,3)
)
print("dataset shape:", d2.shape)

In [None]:
df_unnormalized = pd.concat([d1,d2])
# normalize
df = preprocess_and_normalize_energy_data(df_unnormalized,['module','batch_size','architecture','layer_idx'], aggregate=True, verbose=True)
df["layer_idx"] = pd.to_numeric(df['layer_idx'])
print("Measured models:", df.architecture.unique())
df.head(n=15)

## Ensuring that module-wise observations are complete for every architecture and batch-size

In [None]:
for a in df.architecture.unique():
    sub = df.loc[df.architecture == a]
    num_modules_a = len(get_modules_from_architecture(architectures_dict[a]))
    for b in sub.batch_size.unique():
        sub_b = sub.loc[sub.batch_size == b]
        if num_modules_a+1 - len(sub_b) != 0:
            print(f"removing all observations from {a} with batch_size {b}")
            df = df.drop(index=sub_b.index)
print("Final data-shape: ", df.shape)

## Check if there are large deviations between the accumulated layer-wise energy and the complete architecture energy measurement

In [None]:
# compute % contribution
architecture_wise_deviations = pd.DataFrame(columns=['architecture', 'batch_size', 'percent_deviation', 'total_agg_energy','measured_energy'])
for a in df.architecture.unique():
    # subset data by architecture
    sub = df.loc[df.architecture == a]
    for b in sub.batch_size.unique():
        # subset data by batch_size
        sub_b = sub.loc[sub.batch_size == b]
        # get energy from complete architecture run
        total_measured_energy = sub_b.loc[sub_b.layer_idx == 0].cpu_energy.item()
        # subset only data from individual modules of architecture
        sub_b_modules = sub_b.loc[sub_b.layer_idx != 0]
        # compute empirical total energy from complete architecture run by summing up modules
        total_agg_energy = sum(sub_b_modules.groupby(['architecture', 'batch_size', 'module'])['cpu_energy'].sum().reset_index().cpu_energy)
        new_row = {'architecture':a, 'batch_size':b, 'percent_deviation':(total_agg_energy - total_measured_energy) / total_agg_energy, 'total_agg_energy':total_agg_energy,'measured_energy':total_measured_energy}
        architecture_wise_deviations = pd.concat([architecture_wise_deviations, pd.DataFrame(new_row,index=[0])], ignore_index=True)
# print avg energies; compare energy when running the full architecture vs summing the energy over the individual modules
agg = architecture_wise_deviations.groupby(['architecture']).mean()
agg.columns = [f"mean_{col_name}" for col_name in agg.columns]
agg

In [None]:
# create a plot to compare layer-wise and complete measurements energy data, to inspect deviations visually
sns.set(font_scale=1.5)
plt.figure(figsize=(13,7))
to_plot = architecture_wise_deviations.rename(columns={"total_agg_energy": "layer-wise aggregate", 'measured_energy':'total measured energy'})
to_plot = pd.melt(to_plot, id_vars=['architecture','batch_size'], value_vars=['percent_deviation','layer-wise aggregate','total measured energy'],var_name='method')
g = sns.barplot(data=to_plot.loc[to_plot.method != 'percent_deviation'], x="architecture", hue="method", y="value")
g.set_xlabel("architecture")
g.set_ylabel("consumed energy")

In [None]:
# check for deviations that are too big
# and remove bad measurements with high deviations
blacklist = []
data = df.copy()
for idx, row in architecture_wise_deviations.iterrows():
    if abs(row.percent_deviation) > 0.9:
        blacklist.append({'architecture': row.architecture, 'batch_size': row.batch_size})
if len(blacklist) != 0:
    print("WARNING: measurements with large deviations (>10%) detected. Corresponding observations will be removed!")
    print("Number of blacklisted configurations: ", len(blacklist))
    # remove measurements where the deviations are too big
    data_shape = data.shape
    for config in blacklist:
        data = data.loc[~(data[list(config)] == pd.Series(config)).all(axis=1)]
    print(f"A total of {data_shape[0]-data.shape[0]} rows have been removed.")
else:
    print("No bad measurements found!")

# Predictions
## Load configuration

In [None]:
# load config
with open('../model_fitting_and_estimation_config.yaml', "r") as stream:
    try:
        config = yaml.safe_load(stream)
    except yaml.YAMLError as exc:
        print(exc)

## Compute full architecture and channel-wise predictions

In [None]:
full_architecture_preds = pd.DataFrame(columns=["y", "y_hat", "batch_size", "architecture"])
channel_wise_preds = pd.DataFrame(columns=["y", "y_hat", "batch_size", "architecture", "module", "layer_idx", "sanity"])
data = df

for a_name, architecture in architectures_dict.items():
    # get data for the model
    data_a = data.loc[data['architecture'] == a_name].copy()
    data_a.reset_index(drop=True)

    # compute the predictions for the entire model
    # for all batch-sizes that were sampled
    for batch_size in data_a.batch_size.unique():
        # parse model structure and return list of channels
        total_predicted_energy, channel_wise_energies = compute_energy_estimate(architecture=architecture,
                                                                                batch_size=batch_size,
                                                                                config=config["model_configurations"])
        # sub-select corresponding to batch_size
        data_a_batch_size = data_a.loc[data_a.batch_size == batch_size]
        # sub-select the measurement that corresponds to the entire row
        a_measurement = data_a_batch_size.loc[data_a_batch_size.layer_idx == 0].copy()

        new_row = {
            "y": [a_measurement.cpu_energy.item()],
            "y_hat": [total_predicted_energy[0]],
            "batch_size": [batch_size],
            "architecture": [a_name]
        }
        full_architecture_preds = pd.concat([full_architecture_preds, pd.DataFrame(new_row)], ignore_index=True)

        # parse and combine local energy predictions with measurements
        # sub-select from dataframe
        data_a_compwise = data_a_batch_size.loc[data_a_batch_size.layer_idx != 0]
        if len(data_a_compwise) != 0:
            # remove Dropout and Adaptive Pooling layers
            # data_a_compwise = data_a_compwise.loc[~((data_a_compwise.module == "AdaptiveAvgPool2d") | (data_a_compwise.module == "Dropout"))]
            data_a_compwise.reset_index(inplace=True, drop=True)
            for idx, row in data_a_compwise.iterrows():
                new_row = {
                    "y": row.cpu_energy,
                    "y_hat": channel_wise_energies[idx][1],
                    "batch_size": batch_size,
                    "architecture": a_name,
                    "module": row.module,
                    "layer_idx": row.layer_idx,
                    "sanity": channel_wise_energies[idx][0]
                }
                channel_wise_preds = pd.concat([channel_wise_preds, pd.DataFrame(new_row,index=[0])], ignore_index=True)

# Analyze Results
# Complete architecture estimates

In [None]:
# metrics
r2 = r2_score(full_architecture_preds.y, full_architecture_preds.y_hat)
print("Overall total R2-Score: ", r2)
for a_name, architecture in architectures_dict.items():
    # sub-select predictions by model
    full_prediction_single_model = full_architecture_preds.loc[full_architecture_preds.architecture == a_name]
    if len(full_prediction_single_model) == 0:
        continue
    print("-----------")
    percent_deviation = abs(full_prediction_single_model.y_hat - full_prediction_single_model.y) / full_prediction_single_model.y
    print(a_name)
    print(f"R2-Score: ",r2_score(full_prediction_single_model.y, full_prediction_single_model.y_hat))
    print(f"Avg-Abs-%-Deviation: {percent_deviation.mean():.2%}")

In [None]:
sns.set(font_scale=1.5)

In [None]:
plt.figure(figsize=(10,5.5))
g = sns.scatterplot(data=full_architecture_preds, x="y", y="y_hat", hue="architecture",palette=["#4C72B0","#DD8452","#55A868","#C44E52"])
min_x = min(min(full_architecture_preds.y), min(full_architecture_preds.y_hat))
max_x = max(max(full_architecture_preds.y), max(full_architecture_preds.y_hat))
g.plot([min_x, max_x], [min_x, max_x], transform=g.transData, linestyle="--", color="#f032e6")
g.set_xlabel("Ground Truth")
g.set_ylabel("Predictions")
custom_lines = [
                plt.Line2D([0], [0], color="#4C72B0", lw=2),
                plt.Line2D([0], [0], color="#DD8452", lw=2),
                plt.Line2D([0], [0], color="#55A868", lw=2),
                plt.Line2D([0], [0], color="#C44E52", lw=2),
]
custom_lines2 = [plt.Line2D([0], [0], color="#f032e6", lw=2, linestyle="--")]
legend2 = g.legend(custom_lines2, ["ideal"], bbox_to_anchor=(0.43, 0.980), loc='upper right', borderaxespad=0.)
g.add_artist(legend2)
plt.legend(custom_lines, ['AlexNet', 'VGG13', 'VGG11', 'VGG16'], title="architecture", loc="best")

# Channel-wise estimates


In [None]:
sns.set(font_scale=1.6)
for channel_type in channel_wise_preds.module.unique():
    channel_preds = channel_wise_preds.loc[channel_wise_preds.module == channel_type]
    percent_deviation = abs(channel_preds.y_hat - channel_preds.y) / channel_preds.y
    print(channel_type)
    print(f"R2-Score: ", r2_score(channel_preds.y, channel_preds.y_hat))
    print(f"Avg-Abs-%-Deviation: {percent_deviation.mean():.2%}")
    plt.figure(figsize=(9,9))
    g = sns.scatterplot(data=channel_preds, x="y", y="y_hat", hue="architecture", palette=["#4C72B0", "#DD8452", "#55A868", "#C44E52"])
    plt.ticklabel_format(style='scientific', axis='x', scilimits=(0,0))
    plt.ticklabel_format(style='scientific', axis='y', scilimits=(0,0))
    min_x = min(min(channel_preds.y), min(channel_preds.y_hat))
    max_x = max(max(channel_preds.y), max(channel_preds.y_hat))
    g.plot([min_x, max_x], [min_x, max_x], transform=g.transData, linestyle="--", color="#f032e6")
    g.set_xlabel("Ground Truth")
    g.set_ylabel("Predictions")
    custom_lines = [
                    plt.Line2D([0], [0], color="#4C72B0", lw=2),
                    plt.Line2D([0], [0], color="#DD8452", lw=2),
                    plt.Line2D([0], [0], color="#55A868", lw=2),
                    plt.Line2D([0], [0], color="#C44E52", lw=2),
    ]
    custom_lines2 = [plt.Line2D([0], [0], color="#f032e6", lw=2, linestyle="--")]
    legend2 = g.legend(custom_lines2, ["ideal"], bbox_to_anchor=(0.234, 0.985), loc='upper right', borderaxespad=0.1)
    g.add_artist(legend2)
    plt.legend(custom_lines, ['AlexNet', 'VGG13', 'VGG11', 'VGG16'], title="architecture", loc="best")
    plt.show()