In [55]:
from typing import TypedDict
import warnings

# https://stackoverflow.com/questions/15777951/how-to-suppress-pandas-future-warning
warnings.simplefilter(action='ignore', category=FutureWarning)

import pandas as pd

pd.options.mode.chained_assignment = None
import os
import fnmatch
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import scipy
import scipy.ndimage

def add_common_styling_to_fig(fig):
    fig.update_layout({
        'margin': dict(l=10, r=10, t=10, b=10)
    })

title_y_power = 'Power in W'
title_x_time = 'Time in s'


In [56]:

def get_subdirectories_with_measurements(directory: str, pattern: str):
    matches = []
    for root, dirs, files in os.walk(directory):
        for subdir in dirs:
            if fnmatch.fnmatch(subdir, pattern):
                matches.append(os.path.join(root, subdir))
    return matches

def get_timed_measurements(path: str) -> pd.DataFrame:
    measurements = pd.read_csv(f"{path}/measurements.csv", decimal='.')

    timings = measurements.tail(2)
    timings.columns = ['timestamp']

    start = timings['timestamp'].min()
    end = timings['timestamp'].max()
    runtime = end - start

    # Omit the last two rows
    measurements = measurements.iloc[:-2]
    measurements.columns = ['power']

    time_between_measurements = runtime / measurements.shape[0]

    measurements['time'] = measurements.apply(lambda row: time_between_measurements * row.name, axis=1)
    measurements['power_W'] = measurements['power'] / 1000
    measurements['timestamp'] = measurements.apply(lambda row: start + time_between_measurements * row.name, axis=1)
    smoothed = scipy.ndimage.gaussian_filter(measurements['power_W'], sigma=2)
    measurements['current_smoothed'] = smoothed

    return measurements

def get_timed_events(path: str, start: int) -> pd.DataFrame:
    event_columns = ['timestamp', 'label', 'time']
    events = pd.read_csv(f"{path}/events.csv", header=None, names=event_columns,) if os.path.exists(f"{path}/events.csv") else pd.DataFrame(columns=event_columns)
    if (not events.empty):
        # this is hacky, since python is slow we can't even log the start of the program by itself, so we grab it from the 
        # measurements file
        measurements = pd.read_csv(f"{path}/measurements.csv", decimal='.')
        start_row = measurements.iloc[-2]
        hacky_row = pd.DataFrame([{'timestamp': start_row[0] + 30, 'label': ' Start'}], columns=event_columns)

        events = pd.concat([hacky_row, events], ignore_index=True)

        # timestamps_for_this_run = events # [(fmnist_timestamps['timestamp'] > start) & (fmnist_timestamps['timestamp'] < end)]
        events['time'] = events['timestamp'].apply(lambda timestamp: timestamp - start)

    return events

def create_graphs_for_experiment(path):
    measurements = get_timed_measurements(path)

    start = measurements['timestamp'].min()

    fig = go.Figure()
    # fig = px.scatter(measurements, x='time', y='power_W')

    fig.add_trace(go.Scatter(
        x=measurements['time'], 
        y=measurements['power_W'],
        mode='markers'
        ))

    fig.add_trace(go.Scatter(x=measurements['time'], y=measurements['current_smoothed']))

    events = get_timed_events(path, start)
    for index, row in events.iterrows():
        fig.add_vline(x=row['time'], line_width=3, line_color="green", annotation=dict(text=row['label'], textangle=-90, yshift=-20))

    baseline = measurements['power_W'].mean()

    fig.add_hline(y=baseline, line_dash="dot")

    fig.update_layout({
        'yaxis_title': title_y_power,
        'xaxis_title': title_x_time,
        'showlegend': False,
    })

    add_common_styling_to_fig(fig)

    filepath = f"{path}/plot.pdf"
    print(f"Saving to {filepath}")
    pio.write_image(fig, file=filepath, format='pdf')

for path in get_subdirectories_with_measurements('.', 'measurements*'):
    if (os.path.exists(f"{path}/plot.pdf")):
        print(f'Skipping {path}')
        continue
    print(f'Plotting {path}')
    create_graphs_for_experiment(path)

Skipping ./measurements_sleep_0714004033
Skipping ./measurements_roberta_full_0714004310
Skipping ./measurements_roberta_stop_without_saving_epoch_2_1_0714023802
Skipping ./measurements_sleep_0714002102
Skipping ./measurements_roberta_continue_after_not_saving_2_8_0714034016
Skipping ./measurements_roberta_stop_without_saving_epoch_2_7_0714032742
Skipping ./measurements_roberta_full_0714010642
Skipping ./measurements_imports_only_0714040316
Skipping ./measurements_roberta_continue_after_not_saving_2_1_0714024222
Skipping ./measurements_roberta_continue_after_saving_2_6_0714020129
Skipping ./measurements_roberta_continue_after_not_saving_2_0_0714023406
Skipping ./measurements_imports_only_0714035927
Skipping ./measurements_imports_only_0714040200
Skipping ./measurements_imports_only_0714040044
Skipping ./measurements_roberta_continue_after_saving_2_4_0714014516
Skipping ./measurements_roberta_full_0714004823
Skipping ./measurements_roberta_stop_without_saving_epoch_2_5_0714031113
Skippi

In [57]:
# Determine the baseline

all_sleep = pd.DataFrame()

for path in get_subdirectories_with_measurements('.', 'measurements_sleep*'):
    df = get_timed_measurements(path)
    all_sleep = pd.concat([all_sleep, df], ignore_index=True)
    
print(f"average baseline power draw is {all_sleep['power_W'].mean()} with an std of {all_sleep['power_W'].std()}")

average baseline power draw is 49.895335054792795 with an std of 4.407221447615426


In [58]:
# create single-run experiment graphs

def get_color(event_str: str) -> str:
    if ('start program' in event_str):
        return 'rgba(255, 0, 0, 0.2)' 
    if ('after load data' in event_str):
        return 'rgba(0, 255, 0, 0.2)'
    if ('ended' in event_str):
        return 'rgba(0, 0, 255, 0.2)'
    if ('Saved' in event_str):
        return 'rgba(236, 39, 245, 0.2)'
    if ('Eval' in event_str or "eval" in event_str): # regrets
        return 'rgba(245, 172, 39, 0.2)'
    if ('Start training' in event_str):
        return 'rgba(245, 172, 39, 1)'
    return 'rgba(0, 0, 0, 1)'

def get_label(event_str: str) -> str:
    if ('start program' in event_str):
        return 'load python libraries' 
    if ('after load data' in event_str):
        return 'load dataset'
    if ('Start train' in event_str):
        return 'prepare dataset'
    if ('Eval' in event_str):
        return 'evaluate'
    if ('ended' in event_str):
        return 'train'
    if ('Saved' in event_str):
        return 'save checkpoint'
    return event_str

def remove_first_and_last_30s(measurements_df, events_df):
        cutoff_time = measurements_df['time'].max() - 30
        only_program_measurements = (measurements_df['time'] >= 30) & (measurements_df['time'] < cutoff_time)
        measurements_df = measurements_df[only_program_measurements]

        measurements_df['time'] = measurements_df['time'] - 30
        events_df['time'] = events_df['time'] - 30

        events_df = events_df[events_df['label'] != ' Exit']

        return measurements_df, events_df

def create_graphs_for_single_experiment(path):
    measurements = get_timed_measurements(path)
    start = measurements['timestamp'].min()
    events = get_timed_events(path, start)

    measurements, events = remove_first_and_last_30s(measurements, events)

    fig = go.Figure()

    fig.add_trace(go.Scatter(
        x=measurements['time'], 
        y=measurements['power_W'],
        mode='markers',
        name="Measurements"
    ))

    fig.add_trace(go.Scatter(x=measurements['time'], y=measurements['current_smoothed'], name="Gaussian Trendline"))

    phases = []

    for index, row in events.iterrows():
        if (row['label'] == " Start" or row['label'] == " End" or "End training" in row['label']):
            continue
        phases += [{"label": row['label'], "time": row['time'], "std": 0}]


    fig.update_layout({
        'yaxis_title': title_y_power,
        'xaxis_title': title_x_time,
    })

    add_common_styling_to_fig(fig)

    fig.layout.yaxis.range = [0,260]
    fig.layout.xaxis.range = [0,measurements["time"].max()]
    phases = sorted(phases, key=lambda x: x['time'])

    phases_added_to_legend = []

    for phase_1, phase_2 in list(zip([{"time": 0}, *phases], [{"time": 0}, *phases][1:])):
        label = get_label(phase_2["label"])

        fig.add_shape(type='rect', layer="below", showlegend=(label not in phases_added_to_legend),
              name=label,
              x0=phase_1["time"], y0=0, x1=phase_2["time"], y1=260,
              line=dict(color='rgba(0, 0, 0, 0)'),
              fillcolor=get_color(phase_2["label"]))

        phases_added_to_legend += [label]

    fig.update_layout(legend ={
        "yanchor":"top",
        "y":0.99,
        "xanchor":"left",
        "x":0.01
    })

    fig.show()

    filepath = f"{path}/plot.pdf"
    print(f"Saving to {filepath}")
    pio.write_image(fig, file=filepath, format='pdf')

create_graphs_for_single_experiment('measurements_roberta_full_0714010405')

Saving to measurements_roberta_full_0714010405/plot.pdf


In [59]:
# Lets see how they overlap
from sklearn.cluster import DBSCAN


experiments = ['sleep_0714', 'roberta_full_0714', 'roberta_stop_without_saving', 'roberta_continue_after_not_saving', 'roberta_stop_after_saving', 'roberta_continue_after_saving', 'imports_only_0714']

def plot_multiple_experiments(name: str):
    folder_paths = get_subdirectories_with_measurements('.', f'measurements_{name}*')
    all_experiments = pd.DataFrame()
    all_events = pd.DataFrame(columns=['timestamp', 'label', 'time', 'id'], data=[])

    fig = go.Figure()


    for index, path in enumerate(folder_paths):
        measurements = get_timed_measurements(path)
        measurements['id'] = index # give each experiment a different id, so we can differentiate them later

        start = measurements['timestamp'].min()
        events = get_timed_events(path, start)
        events['id'] = index

        measurements, events = remove_first_and_last_30s(measurements, events)

        fig.add_trace(go.Scatter(x=measurements['time'], y=measurements['current_smoothed'], name=path, 
                                line=dict(color='black', width=2),
                                marker=dict(size=8),
                                opacity=0.3, showlegend=False
        ))
        all_events = pd.concat([all_events, events], ignore_index=True)

    fig.update_layout({
        'yaxis_title': title_y_power, 
        'xaxis_title': title_x_time,
        # 'showlegend': False
    })

    add_common_styling_to_fig(fig)

    phases = []

    grouped_events = all_events.groupby('label')
    for category, group in grouped_events:

        if (category == " Start" or category == " End" or "End training" in category):
            continue

        if category == ' Evaluate':
            X = group['time'].values.reshape(-1, 1)

            # Create a DBSCAN model
            # eps is the maximum distance between two samples for one to be considered in the neighborhood of the other
            # min_samples is the number of samples in a neighborhood for a point to be considered as a core point
            dbscan = DBSCAN(eps=2, min_samples=1)
            group['eval_group'] = dbscan.fit_predict(X)

            for eval_category, eval_group in group.groupby('eval_group'):
                group = eval_group.reset_index(drop=True)
                std = eval_group['time'].std(skipna=True)
                average_time = pd.to_numeric(group['time']).mean()

                phases += [{"label": "evaluate", "time": average_time, "std": std}]

                # fig.add_annotation(x=average_time, y=0, text=f"Evaluation, std={round(std,2)}", showarrow=True, arrowhead=2)

                # fig.add_vline(x=average_time, line_width=1, line_color="black", annotation_position="bottom left", annotation=dict(text=f"Evaluation, {round(std,2)}", textangle=-90))

        else:
            group = group.reset_index(drop=True)
            std = group['time'].std(skipna=True)
            average_time = pd.to_numeric(group['time']).mean()
            # fig.add_annotation(x=average_time, y=0, text=f"Evaluation, std={round(std,2)}", showarrow=True, arrowhead=2)

            phases += [{"label": category, "time": average_time, "std": std}]


            # fig.add_vline(x=average_time, line_width=1, line_color="black", annotation_position="bottom left", annotation=dict(text=f"{get_label(category)}, {round(std,2)}", textangle=-90))

    if 'sleep' in name:
        fig.layout.yaxis.range = [0,100]

        fig.add_hline(y=measurements['power_W'].mean(), line=dict(color='red', width=2))

        fig.add_annotation(
            x=30,  # X position of the annotation
            y=measurements['power_W'].mean(),  # Y position (same as the line)
            text=f"Mean power is {round(measurements['power_W'].mean(), 2)} with an std of {round(measurements['power_W'].std(), 2)}",
            showarrow=True,
            arrowhead=2,
            ax=0,  # X offset of the arrow
            ay=-40,  # Y offset of the arrow
            font=dict(color='red')  # Optional: color of the annotation text
        )

    else:
        fig.layout.yaxis.range = [0,260]
    phases = sorted(phases, key=lambda x: x['time'])

    phases_added_to_legend = []

    for phase_1, phase_2 in list(zip([{"time": 0}, *phases], [{"time": 0}, *phases][1:])):
        label = get_label(phase_2["label"])

        fig.add_shape(type='rect', layer="below", showlegend=(label not in phases_added_to_legend),
              name=label,
              x0=phase_1["time"], y0=0, x1=phase_2["time"], y1=260,
              line=dict(color='rgba(0, 0, 0, 0)'),
              fillcolor=get_color(phase_2["label"]))

        phases_added_to_legend += [label]


    fig.update_layout(legend ={
        "yanchor":"top",
        "y":0.99,
        "xanchor":"left",
        "x":0.01
    })

    print(f"Stacked plot for {experiment}")
    fig.show()
    filepath = f"stacked_plots/{experiment}.pdf"
    print(f"Saving to {filepath}")
    pio.write_image(fig, file=filepath, format='pdf')

for experiment in experiments:
    plot_multiple_experiments(experiment)

Stacked plot for sleep_0714


Saving to stacked_plots/sleep_0714.pdf
Stacked plot for roberta_full_0714


Saving to stacked_plots/roberta_full_0714.pdf
Stacked plot for roberta_stop_without_saving


Saving to stacked_plots/roberta_stop_without_saving.pdf
Stacked plot for roberta_continue_after_not_saving


Saving to stacked_plots/roberta_continue_after_not_saving.pdf
Stacked plot for roberta_stop_after_saving


Saving to stacked_plots/roberta_stop_after_saving.pdf
Stacked plot for roberta_continue_after_saving


Saving to stacked_plots/roberta_continue_after_saving.pdf
Stacked plot for imports_only_0714


Saving to stacked_plots/imports_only_0714.pdf


In [60]:
from typing import List, Tuple
from scipy import integrate
import re

# Determine the overhead we gained from stopping and resuming the program
# lets first do this by simply calculating the average energy increase between the un-stopped run and the stopped one

# unit is kJ
# the labels are a mess sadly, as they all include an extra whitespae in the beginning
def energy_of_run(path: str, start_event = ' Start', end_event = ' Exit') -> int:
    all_measurements = get_timed_measurements(path)
    run_start_timestamp = all_measurements['timestamp'].min()


    events = get_timed_events(path, run_start_timestamp)
    program_start = events[events['label'] == start_event].iloc[0]['timestamp'].min()
    program_end = events[events['label'] == end_event].iloc[0]['timestamp'].max()

    # each trace also contains some time before and after the actual execution
    measurements_of_program = all_measurements[(all_measurements['timestamp'] >= program_start) & (all_measurements['timestamp'] <= program_end)]
    energy = integrate.simpson(y=measurements_of_program['power'], x=measurements_of_program['time'])
    return energy # mW * s === mJ


# seems like one full run is in the order of ~13kJ which is 108W, which sounds reasonable 

# first the the one without stopping
energy_costs = pd.DataFrame(columns=['path', 'cost_mJ'])

complete_runs: int = get_subdirectories_with_measurements('.', f'measurements_roberta_full_0714*')
for path in complete_runs:
    energy_costs = pd.concat([energy_costs, pd.DataFrame(data={'path': path, 'cost_mJ': energy_of_run(path)}, index=[0])], ignore_index=True)

energy_costs['cost_kJ'] = energy_costs['cost_mJ'].apply(lambda x: x / 1000 / 1000)
average_cost_full = energy_costs['cost_kJ'].mean()

print(f"Running unstopped costs {round(average_cost_full, 2)} kJ on Average with an std of {round(energy_costs['cost_kJ'].std(), 2)}")

# now the cost of running the training and stopping halfway through
# the data layout is a bit complicated, each run is split into two folders so 
# measurements_roberta_stop_after_saving_epoch_2_**2**_0714011648 would belong to power-measurements/measurements_roberta_continue_after_saving_2_**1**_0714012054

# run this like ("stop_after_saving_epoch_2", "continue_after_saving_2")
def find_matching_runs(patternA: str, patternB: str) -> List[Tuple[str, str]]:
    a = get_subdirectories_with_measurements('.', patternA)
    b = get_subdirectories_with_measurements('.', patternB)

    indexed_runs_a = dict([(re.findall(r'\d+', run)[1], run) for run in a]) # {'0' : patternA_0_....}
    indexed_runs_b = dict([(re.findall(r'\d+', run)[1], run) for run in b])

    # im terribly sorry for this; this takes each path of an A run and finds the matching run of B
    pairs = [(run_of_a[1], indexed_runs_b[run_of_a[0]]) for run_of_a in indexed_runs_a.items()] 
    return pairs

def get_combined_costs_for_multiple(patternA: str, patternB: str) -> pd.DataFrame:
    matched_runs = find_matching_runs(patternA, patternB)
    energy_costs_combined = pd.DataFrame(columns=['path', 'cost_mJ'])
    for path_pair in matched_runs:
        cost_first = energy_of_run(path_pair[0])
        cost_second = energy_of_run(path_pair[1])
        combined_cost = cost_first + cost_second # this is some advanced IT engineering
        energy_costs_combined = pd.concat([
                energy_costs_combined if not energy_costs_combined.empty else None, # https://stackoverflow.com/questions/77254777/alternative-to-concat-of-empty-dataframe-now-that-it-is-being-deprecated
                pd.DataFrame(data={'path': f"{path_pair[0]}+{path_pair[1]}", 'cost_mJ': combined_cost}, index=[0])]
            , ignore_index=True)

    return energy_costs_combined

save_resume_costs = get_combined_costs_for_multiple('measurements_roberta_stop_after_saving_epoch*', 'measurements_roberta_continue_after_saving*')
save_resume_costs['cost_kJ'] = save_resume_costs['cost_mJ'].apply(lambda x: x / 1000 / 1000)
average_cost_save_resume = save_resume_costs['cost_kJ'].mean()
print(f"Running with save+resume costs {round(average_cost_save_resume, 2)} kJ on Average with an std of {round(save_resume_costs['cost_kJ'].std(), 2)}")

unsaved_resume_costs = get_combined_costs_for_multiple('measurements_roberta_stop_without_saving_epoch*', 'measurements_roberta_continue_after_not_saving*')
unsaved_resume_costs['cost_kJ'] = unsaved_resume_costs['cost_mJ'].apply(lambda x: x / 1000 / 1000)
average_cost_unsaved_resume_kJ = unsaved_resume_costs['cost_kJ'].mean()

print(f"Running with dontsave+resume costs {round(average_cost_unsaved_resume_kJ, 2)} kJ on Average with an std of {round(unsaved_resume_costs['cost_kJ'].std(), 2)}")

Running unstopped costs 12.97 kJ on Average with an std of 0.04
Running with save+resume costs 13.94 kJ on Average with an std of 0.1
Running with dontsave+resume costs 15.72 kJ on Average with an std of 0.07


In [61]:
from scipy.stats import ttest_rel

# lets try to explain the overhead; for save-resume we hypothesise it'll likely be the extra startup phase
# for unsaved-resume, we suspect it should be an extra startup phase and one extra training+evaluate phase

def average_cost_for_phase(pattern: str, start_event: str, end_event:str) -> int:
    paths = get_subdirectories_with_measurements('.', pattern)
    energy_costs = pd.DataFrame([energy_of_run(path, start_event, end_event) for path in paths], columns=['cost_mJ'])
    return energy_costs['cost_mJ'].mean()


average_cost_for_extra_startup = average_cost_for_phase('measurements_roberta_continue_after_saving*', ' Start', ' Start training')
average_cost_for_extra_startup_kJ = average_cost_for_extra_startup / 1000 / 1000
print(f"an extra start costs {round(average_cost_for_extra_startup_kJ, 2)} on avg, resulting in {average_cost_full - average_cost_save_resume + average_cost_for_extra_startup_kJ} unexplained kJ")
average_cost_for_not_saving_kJ = average_cost_for_phase('measurements_roberta_continue_after_not_saving*', ' Start', ' Epoch 2.0. Saved. Steps: 24') / 1000 / 1000
print(f"starting and doing one more epoch costs {round(average_cost_for_not_saving_kJ, 2)} on avg, resulting in {average_cost_full - average_cost_unsaved_resume_kJ + average_cost_for_not_saving_kJ} unexplained kJ")


# run a t-test by subtracting each hyposethises overhead and check wether we'd end up at our full run
def check_wether_overhead_is_explained_by_phase(combined_costs: pd.DataFrame, start_event_hypothesis: str, end_event_hypothesis: str):
    combined_costs['suspected_overhead'] = combined_costs['path'].apply(lambda paths: energy_of_run(str(paths).split('+')[1], start_event_hypothesis, end_event_hypothesis))
    combined_costs['cost_minus_overhead'] = combined_costs['cost_mJ'] - combined_costs['suspected_overhead']
    t_stat, p_val = ttest_rel(combined_costs['cost_minus_overhead'], energy_costs['cost_mJ'])
    return t_stat, p_val

alpha = 0.05

# t_stat_save, p_val_save = check_wether_overhead_is_explained_by_phase(save_resume_costs, ' Start', ' Start training')
# print(f"t_state: {t_stat_save}, p_val: {p_val_save}, with alpha = {alpha} we can{'' if p_val_save < alpha else ' not'} reject the Null-Hypothesis that their means don't differ significantly")

# t_stat_unsaved, p_val_unsaved = check_wether_overhead_is_explained_by_phase(unsaved_resume_costs, ' Start', ' Epoch 2.0. Saved. Steps: 24')
# print(f"t_state: {t_stat_unsaved}, p_val: {p_val_unsaved}, with alpha = {alpha} we can{'' if p_val_unsaved < alpha else ' not'} reject the Null-Hypothesis that their means don't differ significantly")




an extra start costs 1.1 on avg, resulting in 0.1345031865627162 unexplained kJ
starting and doing one more epoch costs 3.38 on avg, resulting in 0.6302090126221653 unexplained kJ


In [62]:
from typing import List, TypedDict
import sys

sys.path.append("../GAIA/src")
import power_consumption_profiles as profile

class PhaseDuration(TypedDict):
    label: str
    duration: int
    start: int

# cool, we determined that there is indeed an overhead as expected, lets try to turn this into a model that we could parameterize
# I guess a first start would to assume that each stage has a constant average power draw, that we shall now get from our data
# Looking at the graphs, the phases don't seem to differ much between the different experiments, so lets just look at the complete_run

def averages_of_phases(name: str):
    folder_paths = get_subdirectories_with_measurements('.', f'measurements_{name}*')
    all_experiments = pd.DataFrame()
    all_events = pd.DataFrame(columns=['timestamp', 'label', 'time', 'id'], data=[])
    phase_durations: List[PhaseDuration] = []

    for index, path in enumerate(folder_paths):
        measurements = get_timed_measurements(path)
        measurements['id'] = index # give each experiment a different id, so we can differentiate them later

        start = measurements['timestamp'].min()
        events = get_timed_events(path, start)
        events['id'] = index

        # add the phase description to each measurement
        for (idx1, startEvent), (_idx2, endEvent) in zip(events.iterrows(), events.iloc[1:].iterrows()):
            # what a great library pandas is, I've head you like dataframes so we put some dataframes in your dataframe
            rows_to_be_updated_indexer = (measurements['timestamp'] >= startEvent['timestamp']) & (measurements['timestamp'] < endEvent['timestamp'])
            measurements.loc[rows_to_be_updated_indexer, 'phase'] = startEvent['label'] + str(idx1)
            phase_durations.append(PhaseDuration(label=startEvent['label']  + str(idx1), duration=endEvent['timestamp'] - startEvent['timestamp'], start=startEvent['timestamp']))
        
        all_experiments = pd.concat([
            all_experiments if not all_experiments.empty else None,
            measurements
        ])


        all_events = pd.concat([all_events, events], ignore_index=True)


    # grouped_events = all_events.groupby('label')
    # for category, group in grouped_events:
    #     group = group.reset_index(drop=True)
    #     std = group['time'].std(skipna=True)
    #     average_time = pd.to_numeric(group['time']).mean()

    print(profile.Phase(name='Startup', duration=6, power=59.9))

    phases = { }

    grouped_measurements = all_experiments.groupby('phase')

    for category, group in grouped_measurements:
        avg_power = group['power'].mean() / 1000
        phases.update({category: profile.Phase(name=category, power=avg_power)})

        print(f"{category} avg. power draw is {round(avg_power, 2)} W")
    
    phase_duration_df = pd.DataFrame(phase_durations)
    grouped_durations = phase_duration_df.groupby('label')

    for category, group in grouped_durations:
        avg_duration = group['duration'].mean()
        if category in phases:
            phases[category].update({'duration': avg_duration, 'start': group['start'].mean()})
        else:
            print(f"{category} could not be set.")
        print(f"{category} avg. time is {round(avg_duration, 2)} s")

    return phases

averages = averages_of_phases('roberta_full_0714')
print(averages)


{'name': 'Startup', 'duration': 6, 'power': 59.9}
 End training19 avg. power draw is 123.31 W
 Epoch 1.0 ended. Steps: 124 avg. power draw is 134.0 W
 Epoch 1.0. Saved. Steps: 126 avg. power draw is 235.37 W
 Epoch 2.0 ended. Steps: 247 avg. power draw is 139.88 W
 Epoch 2.0. Saved. Steps: 249 avg. power draw is 239.19 W
 Epoch 3.0 ended. Steps: 3610 avg. power draw is 143.62 W
 Epoch 3.0. Saved. Steps: 3612 avg. power draw is 238.28 W
 Epoch 4.0 ended. Steps: 4813 avg. power draw is 141.87 W
 Epoch 4.0. Saved. Steps: 4815 avg. power draw is 236.59 W
 Epoch 5.0 ended. Steps: 6016 avg. power draw is 146.69 W
 Evaluate11 avg. power draw is 112.46 W
 Evaluate14 avg. power draw is 112.87 W
 Evaluate17 avg. power draw is 107.83 W
 Evaluate5 avg. power draw is 105.1 W
 Evaluate8 avg. power draw is 114.09 W
 Start training3 avg. power draw is 221.93 W
 Start0 avg. power draw is 59.9 W
 after load data2 avg. power draw is 63.17 W
 start program1 avg. power draw is 53.77 W
 End training19 avg. 

In [63]:
import numpy as np

# Convert these phases into a model we can use in our testbed
phases_as_list = sorted([phase for phase in averages.values()], key=lambda x: x.get('start'))

# remove the start field since that doesn't appear in our model
phases: List[profile.Phase] = [profile.Phase(name=thingy.get('name'), duration=thingy.get('duration'), power=round(thingy.get('power'), 2)) for thingy in phases_as_list]

roberta_power_function = profile.FooPowerFunction(profile.roberta_phases_spec)

as_generic_ml_parameters = profile.MachineLearningParameters(
    start_duration = 23.4, 
    start_power = 52,
    training_duration = 8.2,
    training_power = 223,
    evaluate_duration = 2,
    evaluate_power = 137,
    save_duration = 3,
    save_power = 110,
    epochs = 5
)

generic_ml_function = profile.get_power_policy('ml', as_generic_ml_parameters)


print(phases)
# lets sample that model and see how to stacks up against the real traces we have
# one obvious difference will be that the model only supports being sampled for each second, while
# the trace is sampled every 0.05 seconds

measurements_from_model = [{'power': roberta_power_function(i), 'timestamp': i} for i in np.arange(0, roberta_power_function.duration, 0.05)]
print(measurements_from_model)

model_df = pd.DataFrame(measurements_from_model)

model_fig = px.line(model_df, x='timestamp', y='power')

# model_fig.add_trace(go.Scatter(x=generic_model_df['timestamp'], y=generic_model_df['power']))

model_fig.update_layout({'yaxis_title': title_y_power, 'xaxis_title': title_x_time})
model_fig.update_traces(line=dict(color='black', width=4))

add_common_styling_to_fig(model_fig)

model_fig.show()

def plot_multiple_experiments_against_model(name: str):
    folder_paths = get_subdirectories_with_measurements('.', f'measurements_{name}*')
    all_events = pd.DataFrame(columns=['timestamp', 'label', 'time', 'id'], data=[])

    fig = model_fig


    for index, path in enumerate(folder_paths):
        measurements = get_timed_measurements(path)
        # cut out the measurements before and after execution (30s each)
        cutoff_time = measurements['time'].max() - 30
        only_program_measurements = (measurements['time'] >= 30) & (measurements['time'] < cutoff_time)
        measurements = measurements[only_program_measurements]

        # map them back to start at 0
        measurements['time'] = measurements['time'].apply(lambda x: x - 30)
        measurements['power'] = measurements['power'].apply(lambda power_mW: power_mW / 1000)
        measurements['power'] = scipy.ndimage.gaussian_filter(measurements['power'], sigma=2)

        fig.add_trace(go.Scatter(x=measurements['time'], y=measurements['power'], name=path, opacity=0.2))

        start = measurements['timestamp'].min()
        events = get_timed_events(path, start)
        events['id'] = index

        all_events = pd.concat([all_events, events], ignore_index=True)
    
        grouped_events = all_events.groupby('label')

    for category, group in grouped_events:
        group = group.reset_index(drop=True)
        std = group['time'].std(skipna=True)
        average_time = pd.to_numeric(group['time']).mean()
        # fig.add_vline(x=average_time, line_width=2, line_color="green", annotation=dict(text=f"{category}, std={round(std,2)}", textangle=-90))

    fig.update_layout({'yaxis_title': title_y_power, 'xaxis_title': title_x_time, 'showlegend': False})
    add_common_styling_to_fig(fig)

    fig.show()
    pio.write_image(fig, file='model_overlaid.pdf', format='pdf')

plot_multiple_experiments_against_model('roberta_full_0714')

AttributeError: module 'power_consumption_profiles' has no attribute 'FooPowerFunction'

In [26]:
# Lets now check our model's error statistically,
# we do this via k-fold leave one out cross validation
# k = 10, as we repeated the measurements 10 times

from sklearn.model_selection import LeaveOneOut
from typing import Callable

def get_phases_from_measurements(paths: List[str]) -> List[profile.Phase]:
    phase_durations = []
    all_experiments = pd.DataFrame()
    all_events = pd.DataFrame(columns=['timestamp', 'label', 'time', 'id'], data=[])
    phase_durations: List[PhaseDuration] = []


    for index, path in enumerate(paths):
        measurements = get_timed_measurements(path)
        measurements['id'] = index # give each experiment a different id, so we can differentiate them later

        start = measurements['timestamp'].min()
        events = get_timed_events(path, start)
        events['id'] = index

        # add the phase description to each measurement
        for (idx1, startEvent), (_idx2, endEvent) in zip(events.iterrows(), events.iloc[1:].iterrows()):
            # what a great library pandas is, I've head you like dataframes so we put some dataframes in your dataframe
            rows_to_be_updated_indexer = (measurements['timestamp'] >= startEvent['timestamp']) & (measurements['timestamp'] < endEvent['timestamp'])
            measurements.loc[rows_to_be_updated_indexer, 'phase'] = startEvent['label'] + str(idx1)
            phase_durations.append(PhaseDuration(label=startEvent['label']  + str(idx1), duration=endEvent['timestamp'] - startEvent['timestamp'], start=startEvent['timestamp']))
        
        all_experiments = pd.concat([
            all_experiments if not all_experiments.empty else None,
            measurements
        ])


        all_events = pd.concat([all_events, events], ignore_index=True)

    phases = { }

    grouped_measurements = all_experiments.groupby('phase')

    for category, group in grouped_measurements:
        avg_power = group['power'].mean() / 1000
        phases.update({category: profile.Phase(name=category, power=avg_power)})
    
    phase_duration_df = pd.DataFrame(phase_durations)
    grouped_durations = phase_duration_df.groupby('label')

    for category, group in grouped_durations:
        avg_duration = group['duration'].mean()
        if category in phases:
            phases[category].update({'duration': avg_duration, 'start': group['start'].mean()})
        else:
            print(f"{category} could not be set.")

    return sorted(phases.values(), key=lambda x: x['start'])

# we test our model by sampling it at each measured timepoint
def evaluate_rmse_model_against_measurements(model: Callable[[float], float],  path: str) -> float:
    measurements = get_timed_measurements(path)

    cutoff_time = measurements['time'].max() - 35
    only_program_measurements = (measurements['time'] >= 30) & (measurements['time'] < cutoff_time)
    measurements = measurements[only_program_measurements]
    measurements['time'] = measurements['time'] - 30

    measurements['power_predicted'] = measurements['time'].apply(lambda time: model(time))
    measurements['power_actual'] = measurements['power'].apply(lambda power: power / 1000)
    measurements['difference'] = measurements['power_actual'] - measurements['power_predicted']

    fig = go.Figure()
    fig.update_layout({'title': f'Model vs. {path}', 'xaxis_title': 'Time in Seconds', 'yaxis_title': 'Power in W'})
    fig.add_traces([
        go.Scatter(x=measurements['time'], y=measurements['power_predicted'], name='Predicted by Model'),
        go.Scatter(x=measurements['time'], y=measurements['power_actual'], name='Measured'),
    ])
    fig.show()

    rmse = np.sqrt(np.mean((measurements['power_actual'] - measurements['power_predicted']) ** 2))
    return rmse

# we test our model by sampling it at each measured timepoint
def evaluate_total_energy_model_against_measurements(model: Callable[[float], float],  path: str) -> float:
    measurements = get_timed_measurements(path)

    cutoff_time = measurements['time'].max() - 30
    only_program_measurements = (measurements['time'] >= 30) & (measurements['time'] < cutoff_time)
    measurements = measurements[only_program_measurements]
    measurements['time'] = measurements['time'] - 30

    measurements['power_predicted'] = measurements['time'].apply(lambda time: model(time))
    measurements['power_actual'] = measurements['power'].apply(lambda power: power / 1000)

    energy_predicted = integrate.simpson(y=measurements['power_predicted'], x=measurements['time'])
    energy_model = integrate.simpson(y=measurements['power_actual'], x=measurements['time'])
    
    return energy_predicted - energy_model

X = get_subdirectories_with_measurements('.', f'measurements_roberta_full_0714*')
loo = LeaveOneOut()

rmse_scores = []
energy_scores = []

for train_indices, test_index in loo.split(X):

    train_paths = [X[index] for index in train_indices]
    test_path = X[test_index[0]]


    phases = get_phases_from_measurements(train_paths)
    model = profile.create_phases_profile(phases)
    rmse_scores.append(evaluate_rmse_model_against_measurements(model, test_path))
    energy_scores.append(evaluate_total_energy_model_against_measurements(model, test_path))

mean_score = np.mean(rmse_scores)
mean_energy_diff = np.mean(energy_scores)
print(energy_scores)
print(f'Cross Validation with k=10 has an RMSE of {round(mean_score,2)} W on average.')
print(f'Cross Validation with k=10 has an average of difference in ernergy of {round(mean_energy_diff,2)} J on average.')


 Epoch 5.0. Saved. Steps: 6018 could not be set.
 Evaluate20 could not be set.


 Epoch 5.0. Saved. Steps: 6018 could not be set.
 Evaluate20 could not be set.


 Epoch 5.0. Saved. Steps: 6018 could not be set.
 Evaluate20 could not be set.


 Epoch 5.0. Saved. Steps: 6018 could not be set.
 Evaluate20 could not be set.


 Epoch 5.0. Saved. Steps: 6018 could not be set.
 Evaluate20 could not be set.


 Epoch 5.0. Saved. Steps: 6018 could not be set.
 Evaluate20 could not be set.


 Epoch 5.0. Saved. Steps: 6018 could not be set.
 Evaluate20 could not be set.


 Epoch 5.0. Saved. Steps: 6018 could not be set.
 Evaluate20 could not be set.


 Epoch 5.0. Saved. Steps: 6018 could not be set.
 Evaluate20 could not be set.


 Epoch 5.0. Saved. Steps: 6018 could not be set.
 Evaluate20 could not be set.


[np.float64(-90.79226121022475), np.float64(-86.81775178026874), np.float64(-189.09414677987115), np.float64(-58.241416173726975), np.float64(-47.21807701947), np.float64(-203.80264814201655), np.float64(-140.50124497234538), np.float64(-155.64450320647302), np.float64(-102.49447314829376), np.float64(-122.58929504457774)]
Cross Validation with k=10 has an RMSE of 39.32 W on average.
Cross Validation with k=10 has an average of -119.72 kJ on average.


In [None]:
# In order to get a decent baseline to compare a job with dynamic power against, we just use one that has a constant power draw but 
# one that integrates to the same amount of Watt

roberta_power_function = profile.create_phases_profile(profile.roberta_phases)
roberta_power_function_averaged = profile.phases_to_constant_via_average(profile.roberta_phases)

roberta_df = pd.DataFrame([{'power': roberta_power_function(i), 'time': i} for i in np.arange(0, 90, 0.05)])
roberta_averaged_df = pd.DataFrame([{'power': roberta_power_function_averaged(i), 'time': i} for i in np.arange(0, 90, 0.05)])

fig = go.Figure()
fig.update_layout({'xaxis_title': 'Zeit in Sekunden', 'yaxis_title': 'Power in W', 'title': 'Phase Model vs. Constant Model of same Energy usage'})
fig.add_traces([
    go.Scatter(x=roberta_df['time'], y=roberta_df['power'], name="Model"),
    go.Scatter(x=roberta_averaged_df['time'], y=roberta_averaged_df['power'], name="Constant Model"),
])

fig.show()

energy_model = integrate.simpson(y=roberta_df['power'], x=roberta_df['time'])
print(f"The model used {round(energy_model, 1)} J")

energy_model_averaged = integrate.simpson(y=roberta_averaged_df['power'], x=roberta_averaged_df['time'])
print(f"The averaged-model used {round(energy_model_averaged, 1)} J")


The model used 12974.4 J
The averaged-model used 12975.8 J
