Imports

In [None]:
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
# import eesr

# Functions

In [None]:
# Creates a dataframe with the results of the experiments based on the parameters.
def create_dataframe(path_ex, file_name, dcs = '', tpp = ''):
    select_dcs = "dcs_" + dcs
    select_tpp = "tpp_" + tpp
    df = pd.DataFrame()
    for folder in next(os.walk(path_ex))[1]:
        if (select_dcs in folder) & (select_tpp in folder):
            file_path = os.path.join(path_ex, folder, file_name)
            if '.tsv' in file_name:
              data = pd.read_csv(file_path, delimiter='\t')
            else:
              data = pd.read_csv(file_path, delimiter=',')
            data['trace_name'] = folder
            df = pd.concat([df, data])

    return df

# Creates a dataframe with the environment stats of the specified file.
def create_env_dataframe(path_ex, trace_name):
    file_name = 'environment.csv'
    df = pd.DataFrame()
    for folder in next(os.walk(path_ex))[1]:
        if (trace_name in folder):
            file_path = os.path.join(path_ex, folder, file_name)
            data = pd.read_csv(file_path)
            data['trace_name'] = folder
            df = pd.concat([df, data])
    return df

# Splits the environment dataframe into a dataframe for each environment and writes it to a csv file.
# def split_env_dataframe(df, path_ex):



# Variables

Experiment paths

In [None]:
path_ex1 = '../EX_1/'
path_ex2 = '../EX_2/'

colors = ['lightcoral', 'steelblue', 'yellowgreen']
colors2 = ['#F05039', '#E57A77', '#1F449C', '#3D65A5', '#7CA1CC', '#A8B6CC']

Datacentre dependent dataframes

In [None]:
stats_dc1_df = create_dataframe(path_ex=path_ex1, file_name = 'stats.tsv', dcs='1', tpp='')
stats_dc2_df = create_dataframe(path_ex=path_ex1, file_name = 'stats.tsv', dcs='2', tpp='')

summary_dc1_df = create_dataframe(path_ex=path_ex1, file_name = 'summary.tsv', dcs='1', tpp='')
summary_dc2_df = create_dataframe(path_ex=path_ex1, file_name = 'summary.tsv', dcs='2', tpp='')

tasks_dc1_df = create_dataframe(path_ex=path_ex1, file_name = 'tasks.tsv', dcs='1', tpp='')
tasks_dc2_df = create_dataframe(path_ex=path_ex1, file_name = 'tasks.tsv', dcs='2', tpp='')

workflows_dc1_df = create_dataframe(path_ex=path_ex1, file_name = 'workflows.tsv', dcs='1', tpp='')
workflows_dc2_df = create_dataframe(path_ex=path_ex1, file_name = 'workflows.tsv', dcs='2', tpp='')

In [None]:
summary_dc1_df[summary_dc1_df['metric'] == 'Workflow Normalized Schedule Length']['mean'].value_counts()

In [None]:
summary_dc1_df[summary_dc1_df['metric'] == 'Workflow Schedule Length']

Fastest Machine Placement variables

In [None]:
stats_dc1_fmp_df = create_dataframe(path_ex=path_ex1, file_name = 'stats.tsv', dcs='1', tpp='fastest_machine')
stats_dc2_fmp_df = create_dataframe(path_ex=path_ex1, file_name = 'stats.tsv', dcs='2', tpp='fastest_machine')

summary_dc1_fmp_df = create_dataframe(path_ex=path_ex1, file_name = 'summary.tsv', dcs='1', tpp='fastest_machine')
summary_dc2_fmp_df = create_dataframe(path_ex=path_ex1, file_name = 'summary.tsv', dcs='2', tpp='fastest_machine')

tasks_dc1_fmp_df = create_dataframe(path_ex=path_ex1, file_name = 'tasks.tsv', dcs='1', tpp='fastest_machine')
tasks_dc2_fmp_df = create_dataframe(path_ex=path_ex1, file_name = 'tasks.tsv', dcs='2', tpp='fastest_machine')

workflows_dc1_fmp_df = create_dataframe(path_ex=path_ex1, file_name = 'workflows.tsv', dcs='1', tpp='fastest_machine')
workflows_dc2_fmp_df = create_dataframe(path_ex=path_ex1, file_name = 'workflows.tsv', dcs='2', tpp='fastest_machine')

Look Ahead Placement variables

In [None]:
stats_dc1_lah_df = create_dataframe(path_ex=path_ex1, file_name = 'stats.tsv', dcs='1', tpp='look_ahead')
stats_dc2_lah_df = create_dataframe(path_ex=path_ex1, file_name = 'stats.tsv', dcs='2', tpp='look_ahead')

summary_dc1_lah_df = create_dataframe(path_ex=path_ex1, file_name = 'summary.tsv', dcs='1', tpp='look_ahead')
summary_dc2_lah_df = create_dataframe(path_ex=path_ex1, file_name = 'summary.tsv', dcs='2', tpp='look_ahead')

tasks_dc1_lah_df = create_dataframe(path_ex=path_ex1, file_name = 'tasks.tsv', dcs='1', tpp='look_ahead')
tasks_dc2_lah_df = create_dataframe(path_ex=path_ex1, file_name = 'tasks.tsv', dcs='2', tpp='look_ahead')

workflows_dc1_lah_df = create_dataframe(path_ex=path_ex1, file_name = 'workflows.tsv', dcs='1', tpp='look_ahead')
workflows_dc2_lah_df = create_dataframe(path_ex=path_ex1, file_name = 'workflows.tsv', dcs='2', tpp='look_ahead')

# Exploration

In [None]:
stats_dc1_df[stats_dc1_df['trace_name'] == 'askalon-new_ee68_parquet_tpp_look_ahead_dcs_1']

In [None]:
stats_dc1_df[stats_dc1_df['AvgResourceUsage'] >= 0.1]

In [None]:
stats_dc1_df.describe()

In [None]:
stats_dc1_df.boxplot(column=['AvgResourceUsage'])

In [None]:
stats_dc2_df[stats_dc2_df['AvgResourceUsage'] >= 0.03]

In [None]:
baseline_performance_dc1 = create_dataframe(path_ex1, 'tasks.tsv', '1', 'fastest_machine')
lookahead_performance_dc1 = create_dataframe(path_ex1, 'tasks.tsv', '1', 'look_ahead')

lookahead_performance_dc1

# Validation

In [None]:
print(tasks_dc1_fmp_df.groupby('trace_name').sum()['energy.consumed'].describe())
tasks_dc1_lah_df.groupby('trace_name').sum()['energy.consumed'].describe()

In [None]:
bl = tasks_dc1_fmp_df.groupby('trace_name').sum()['energy.consumed'].sum()/1000
nl = tasks_dc1_lah_df.groupby('trace_name').sum()['energy.consumed'].sum()/1000

difference_percentage = (nl - bl) / bl * 100
print(difference_percentage, '%')
print('Baseline: ', bl)
print('Lookahead: ', nl)


bar_height = 0.5
fig, ax = plt.subplots(figsize=(7, 1.5))
ax.barh('Fastest Machine + DVFS', bl, color=colors[0], height=bar_height)
ax.barh('Lookahead + DVFS', nl, color=colors[1], height=bar_height)
ax.set_title('Total energy consumption for one datacentre')
ax.set_xlabel('Energy consumption [MWh]')
ax.set_ylabel('Method')
ax.margins(x=0.1, y=0.2)
ax.text(nl, 1, str(round(difference_percentage, 2))+'%', color='black', fontweight='bold')
plt.show()

''' 
TODO:
increase text size (figure text at least as big as text in caption)
axes labels size of caption text
increase method and Energy consumption text size (125% of lah and baseline text size)
Task energy consumption for one datacentre (150% or more of caption text size, but this will be the actual caption in the paper)

Decrease the size of the bars (vertically) (2x size of text in method type)
'''

In [None]:
# workflow_delays[ttp][dvfs_enabled][domain].extend(workflow_df['time.complete'] - workflow_df['time.earliest.complete'])
workflows_dc1_fmp_df['workflow_delays'] = workflows_dc1_fmp_df['time.complete'] - workflows_dc1_fmp_df['time.earliest.complete']
workflows_dc1_lah_df['workflow_delays'] = workflows_dc1_lah_df['time.complete'] - workflows_dc1_lah_df['time.earliest.complete']

In [None]:
# calculate cumulative workflow delays
total_delay_fmp = workflows_dc1_fmp_df['workflow_delays'].sum()/1000/60/60
total_delay_lah = workflows_dc1_lah_df['workflow_delays'].sum()/1000/60/60

# calculate the percentage of delay
delay_percentage = (total_delay_lah - total_delay_fmp) / total_delay_fmp * 100
print(delay_percentage, '%')
print('Baseline: ', total_delay_fmp)
print('Lookahead: ', total_delay_lah)


bar_height = 0.5
fig, ax = plt.subplots(figsize=(7, 1.5))
ax.barh('Fastest Machine + DVFS', total_delay_fmp, color=colors[0], height=bar_height)
ax.barh('Lookahead + DVFS', total_delay_lah, color=colors[1], height=bar_height)
ax.set_title('Total workflow delay for one datacentre')
ax.set_xlabel('Time [h]')
ax.set_ylabel('Method')
ax.margins(x=0.14, y=0.2)
ax.text(total_delay_lah, 1, '+'+str(round(delay_percentage, 2))+'%', color='black', fontweight='bold')
plt.show()

Notes:
Also show workflow Slowdown (box and whisker plot + violin plot) (workflow normalized schedule length)

# Experiment 1

**Process**:
1. Select from the 1 DC runs:
    * a) one representative trace for each target utilization in increments of 10%
    * b) one trace with average, shortest and longest running times
2. Compute utilization graphs for 
    * a) fastest machine placement (fmp)
    * b) lookahead (lah) placement
    * c) compare fmp in 1 DC vs 2 DC
    * d) compare lah in 1 DC vs 2 DC
3. Calculate total energy consumption (including idle consumption) for each trace
4. Select the corresponding traces from the 2 DC runs
5. Run the DCs in different country configurations and show the results from EESR

## 1.a)

In [None]:
stats_dc1_fmp_df = create_dataframe(path_ex=path_ex1, file_name = 'stats.tsv', dcs='1', tpp='fastest_machine')
utilization_candidates_fmp = stats_dc1_fmp_df.sort_values(by=['AvgResourceUsage'], ascending=False)[['trace_name', 'AvgResourceUsage']]

stats_dc1_lah_df = create_dataframe(path_ex=path_ex1, file_name = 'stats.tsv', dcs='1', tpp='look_ahead')
utilization_candidates_lah = stats_dc1_lah_df.sort_values(by=['AvgResourceUsage'], ascending=False)[['trace_name', 'AvgResourceUsage']]

## 1.b)

In [None]:
duration_stats = stats_dc1_fmp_df['TraceDuration'].describe()

max_duration_trace = stats_dc1_fmp_df[stats_dc1_fmp_df['TraceDuration'] == duration_stats['max']].sort_values(by=['AvgResourceUsage'], ascending=False)['trace_name'].values[0]
median_high_duration_trace = stats_dc1_fmp_df[stats_dc1_fmp_df['TraceDuration'] <= duration_stats['50%']].sort_values(by=['TraceDuration'], ascending=False)['trace_name'].values[0]
median_low_duration_trace = stats_dc1_fmp_df[stats_dc1_fmp_df['TraceDuration'] >= duration_stats['50%']].sort_values(by=['TraceDuration'], ascending=True)['trace_name'].values[0]
sorted_df = stats_dc1_fmp_df.sort_values(by=['TraceDuration'], ascending=True)
min_duration_trace = sorted_df[sorted_df['TraceDuration'] > 900_000]['trace_name'].head(1).values[0]
print(min_duration_trace)
duration_candidates_fmp = pd.DataFrame({'trace_name': [max_duration_trace, median_high_duration_trace, median_low_duration_trace, min_duration_trace],
                                    'duration': ['max', 'median-high', 'median-low', 'min']})



duration_stats = stats_dc1_lah_df['TraceDuration'].describe()

max_duration_trace = stats_dc1_lah_df[stats_dc1_lah_df['TraceDuration'] == duration_stats['max']].sort_values(by=['AvgResourceUsage'], ascending=False)['trace_name'].values[0]
median_high_duration_trace = stats_dc1_lah_df[stats_dc1_lah_df['TraceDuration'] <= duration_stats['50%']].sort_values(by=['TraceDuration'], ascending=False)['trace_name'].values[0]
median_low_duration_trace = stats_dc1_lah_df[stats_dc1_lah_df['TraceDuration'] >= duration_stats['50%']].sort_values(by=['TraceDuration'], ascending=True)['trace_name'].values[0]
# min_duration_trace = stats_dc1_lah_df[stats_dc1_lah_df['TraceDuration'] == duration_stats['min']].sort_values(by=['AvgResourceUsage'], ascending=False)['trace_name'].values[0]
sorted_df = stats_dc1_lah_df.sort_values(by=['TraceDuration'], ascending=True)
min_duration_trace = sorted_df[sorted_df['TraceDuration'] > 1_800_000]['trace_name'].head(1).values[0]
print(min_duration_trace)
duration_candidates_lah = pd.DataFrame({'trace_name': [max_duration_trace, median_high_duration_trace, median_low_duration_trace, min_duration_trace],
                                    'duration': ['max', 'median-high', 'median-low', 'min']})

## 2.a),b)

In [None]:
num_machines = 9
TDP = 225
idleTDP = 100
timeframe = 0.25 # 15 minutes

def get_energy_consumption(num_machines, timeframe, idleTDP, TDP):
    total_TDP = TDP * num_machines   # total TDP of all machines
    total_idleTDP = idleTDP * num_machines   # total idle TDP of all machines
    theoretical_max = total_TDP * timeframe * 3600   # Total TDP for 15 minutes converted to Joules
    theoretical_min = total_idleTDP * timeframe * 3600
    theoretical_min_dvfs = theoretical_min * (1 - 0.126) # maximum DVFS reduction
    return theoretical_max, theoretical_min, theoretical_min_dvfs


Plotting functions

In [None]:
MAX_NUM_OF_GRAPHS = 4

conversion_factor = 1_000_000
conversion_name = 'M'

theoretical_max, theoretical_min, theoretical_min_dvfs = get_energy_consumption(num_machines, timeframe, idleTDP, TDP)
theoretical_max = theoretical_max / conversion_factor
theoretical_min = theoretical_min / conversion_factor
theoretical_min_dvfs = theoretical_min_dvfs / conversion_factor

top_limit = 2000000
top_limit = top_limit / conversion_factor


def draw_day_lines(df, ax):
    num_lines = int(df['timestamp'].max() // 24) + 1
    for i in range(num_lines):
        ax.axvline(x=i*24, color = 'black')

def plot_energy_consumption(df_fmp, df_lah, theoretical_max, theoretical_min, theoretical_min_dvfs, max_graphs):
    graph_counter = 0
    for i in range(len(df_fmp)):
        if max_graphs > 0:
            if graph_counter > max_graphs:
                break
        
        fmp_name = df_fmp.iloc[i]['trace_name']
        lah_name = df_lah.iloc[i]['trace_name']

        fmp_env = create_env_dataframe(path_ex1, fmp_name)
        lah_env = create_env_dataframe(path_ex1, lah_name)
        top_limit = 2000000

        # convert to kJ
        fmp_env['it_power_total'] = fmp_env['it_power_total'] / conversion_factor
        lah_env['it_power_total'] = lah_env['it_power_total'] / conversion_factor
        top_limit = top_limit / conversion_factor

        # convert to hours
        fmp_env['timestamp'] = fmp_env['timestamp'] / 1000 / 60 / 60
        lah_env['timestamp'] = lah_env['timestamp'] / 1000 / 60 / 60


        # plot fmp and lah it power total
        if len(fmp_env) > 2:
            fig, ax = plt.subplots(figsize=(10, 5))

            ax.plot(lah_env['timestamp'], fmp_env['it_power_total'], 'o', ls='-', ms=10, color=colors[0], label='FMP', linewidth=2.5, markevery=0.1)
            ax.plot(lah_env['timestamp'], lah_env['it_power_total'], 'v', ls='-', ms=10, color=colors[1], label='LAH', linewidth=2.5, markevery=0.3)

            # fill between with blue where fmp is higher than lah and yellow where lah is higher than fmp
            plt.fill_between(lah_env['timestamp'], lah_env['it_power_total'], fmp_env['it_power_total'], where=fmp_env['it_power_total'] >= lah_env['it_power_total'],
                              facecolor='yellow', alpha=0.3)
            plt.fill_between(lah_env['timestamp'], lah_env['it_power_total'], fmp_env['it_power_total'], where=fmp_env['it_power_total'] <= lah_env['it_power_total'], 
                              facecolor='cyan', alpha=0.3)
            
            text_x_coord = lah_env['timestamp'].max()
            ax.axhline(y=theoretical_max, color='red', linestyle=':', linewidth=2)
            ax.text(text_x_coord-0.08*text_x_coord, theoretical_max-0.07, f'Max = {round(theoretical_max, 2)}', color='black', fontweight='bold')

            ax.axhline(y=theoretical_min, color='green', linestyle=':', linewidth=2)
            ax.text(text_x_coord-0.08*text_x_coord, theoretical_min-0.07, f'Idle = {round(theoretical_min, 2)}', color='black', fontweight='bold')        
            
            ax.axhline(y=theoretical_min_dvfs, color='blue', linestyle=':', linewidth=2)
            ax.text(text_x_coord-0.2*text_x_coord, theoretical_min_dvfs-0.07, f'Idle with DVFS = {round(theoretical_min_dvfs, 2)}', color='black', fontweight='bold')

            plt.fill_between(lah_env['timestamp'], 0, theoretical_min_dvfs, facecolor='black', alpha=0.3)
            plt.fill_between(lah_env['timestamp'], theoretical_max, top_limit, facecolor='red', alpha=0.6)
            
            draw_day_lines(lah_env, ax)

            ax.grid(True)

            ax.set_title('IT power consumption for trace: ' + fmp_name
                          + '\nAverage power consumption: ' + utilization_candidates_fmp.iloc[i]['AvgResourceUsage'].astype(str))
            ax.set_xlabel('Time [h]')
            ax.set_ylabel(f'Energy [{conversion_name}J]')
            
            ax.set_ylim(0, top_limit)

            ax.legend()
            plt.show()
            
            graph_counter += 1
        else:
            print('trace too short: ', fmp_name, ' ', len(fmp_env))


### Utilization candidates

In [None]:
plot_energy_consumption(utilization_candidates_fmp, utilization_candidates_lah, theoretical_max, theoretical_min, theoretical_min_dvfs, MAX_NUM_OF_GRAPHS)
''' 
for traces that show no movement for a long time, truncate them (specify in the thesis)
---
add labels to important points (ex: 1.82)
'''

### Duration candidates

In [None]:
plot_energy_consumption(duration_candidates_fmp, duration_candidates_lah, theoretical_max, theoretical_min, theoretical_min_dvfs, MAX_NUM_OF_GRAPHS)

## 2.c)

Plotting functions

In [None]:

def plot_vs_graphs(candidates, theoretical_max, theoretical_min, max_graphs, placement_policy):
    vs_graph_counter = 0
    
    for i in range(len(candidates)):
        if max_graphs > 0:
            if vs_graph_counter > max_graphs:
                break
        dc_1 = candidates.iloc[i]['trace_name']
        dc_2 = candidates.iloc[i]['trace_name'][:-1] + '2'
        
        env_dc_1 = create_env_dataframe(path_ex1, dc_1)
        env_dc_2 = create_env_dataframe(path_ex1, dc_2)

        env_dc_1['it_power_total'] = env_dc_1['it_power_total'] / conversion_factor
        env_dc_2['it_power_total'] = env_dc_2['it_power_total'] / conversion_factor

        env_dc_1['timestamp'] = env_dc_1['timestamp'] / 1000 / 60 / 60
        env_dc_2['timestamp'] = env_dc_2['timestamp'] / 1000 / 60 / 60

        # fmp_env['it_power_total']

        # plot fmp and lah it power total
        if len(env_dc_1) > 1:
            fig, (ax, ax1) = plt.subplots(1, 2, figsize=(17, 5))

            # ax.plot(lah_env['timestamp'], lah_env['it_power_total'], 'v', ls='--', ms=6, color=colors[1], label='LAH', linewidth=2, markevery=0.3)

            ax.plot(env_dc_1['timestamp'], env_dc_1['it_power_total'], 'o', ls='-', ms=10, color=colors2[0], label=f'{placement_policy} 1 DC', linewidth=2, markevery=0.1)
            ax.plot(env_dc_2[env_dc_2['host_id'] == '0-0']['timestamp'], env_dc_2[env_dc_2['host_id'] == '0-0']['it_power_total'], 'v', ls='-', ms=10, color = colors2[2], label=f'{placement_policy} 2 DC, 1', linewidth=2, markevery=0.15)
            ax.plot(env_dc_2[env_dc_2['host_id'] == '1-1']['timestamp'], env_dc_2[env_dc_2['host_id'] == '1-1']['it_power_total'], '^', ls='-', ms=10, color = colors2[4], label=f'{placement_policy} 2 DC, 2', linewidth=2, markevery=0.2)

            text_x_coord = max(env_dc_2['timestamp'].max(), env_dc_1['timestamp'].max())
            ax.axhline(y=theoretical_max, color='red', linestyle=':', linewidth=2)
            ax.text(text_x_coord-0.12*text_x_coord, theoretical_max-0.07, f'Max = {round(theoretical_max, 2)}', color='black', fontweight='bold')

            ax.axhline(y=theoretical_min, color='green', linestyle=':', linewidth=2)
            ax.text(text_x_coord-0.12*text_x_coord, theoretical_min-0.07, f'Idle = {round(theoretical_min, 2)}', color='black', fontweight='bold')        
            
            # ax.axhline(y=theoretical_min_dvfs, color='blue', linestyle=':', linewidth=2)
            # ax.text(text_x_coord-0.27*text_x_coord, theoretical_min_dvfs-0.07, f'Idle with DVFS = {round(theoretical_min_dvfs, 2)}', color='black', fontweight='bold')

            if env_dc_1['timestamp'].max() < env_dc_2['timestamp'].max():
                ax.fill_between(env_dc_2['timestamp'], 0, theoretical_min, facecolor='black', alpha=0.3)
                ax.fill_between(env_dc_2['timestamp'], theoretical_max, top_limit, facecolor='red', alpha=0.6)
            else:
                ax.fill_between(env_dc_1['timestamp'], 0, theoretical_min, facecolor='black', alpha=0.3)
                ax.fill_between(env_dc_1['timestamp'], theoretical_max, top_limit, facecolor='red', alpha=0.6)

            # compute cumsums
            env_dc_1['cumsum'] = env_dc_1['it_power_total'].cumsum()
            env_dc_2['cumsum'] = env_dc_2['it_power_total'].cumsum()
            env_dc_2['cumsum_1'] = env_dc_2[env_dc_2['host_id'] == '0-0']['it_power_total'].cumsum()
            env_dc_2['cumsum_2'] = env_dc_2[env_dc_2['host_id'] == '1-1']['it_power_total'].cumsum()

            # set relative cumsums
            env_dc_1['relative_cumsum'] = 100  
            env_dc_2['relative_cumsum'] = 0
            env_dc_2['relative_cumsum_1'] = 0
            env_dc_2['relative_cumsum_2'] = 0
            
            # compute relative cumsums
            for time in env_dc_1['timestamp'].unique():
                env_dc_2.loc[env_dc_2['timestamp'] == time, 'relative_cumsum'] = \
                                  env_dc_2.loc[env_dc_2['timestamp'] == time, 'cumsum'] * 100 / env_dc_1.loc[env_dc_1['timestamp'] == time, 'cumsum'].values[0]
                env_dc_2.loc[env_dc_2['timestamp'] == time, 'relative_cumsum_1'] = \
                                  env_dc_2.loc[env_dc_2['timestamp'] == time, 'cumsum_1'] * 100 / env_dc_1.loc[env_dc_1['timestamp'] == time, 'cumsum'].values[0]
                env_dc_2.loc[env_dc_2['timestamp'] == time, 'relative_cumsum_2'] = \
                                  env_dc_2.loc[env_dc_2['timestamp'] == time, 'cumsum_2'] * 100 / env_dc_1.loc[env_dc_1['timestamp'] == time, 'cumsum'].values[0]
            
            # plot relative cumsums
            ax1.plot(env_dc_1['timestamp'], env_dc_1['relative_cumsum'], 'o', ls='-', ms=10, color=colors2[0], label=f'{placement_policy} 1 DC', linewidth=2, markevery=0.1)
            ax1.plot(env_dc_2['timestamp'].iloc[1::2], env_dc_2['relative_cumsum'].iloc[1::2], 's', ls='-', ms=10, color=colors2[1], label=f'{placement_policy} 2 DC sum', linewidth=2, markevery=0.15)
            ax1.plot(env_dc_2[env_dc_2['host_id'] == '0-0']['timestamp'], env_dc_2[env_dc_2['host_id'] == '0-0']['relative_cumsum_1'], 
                    'v', ls='-', ms=10, color=colors2[2], label=f'{placement_policy} 2 DC, 1', linewidth=2, markevery=0.2)
            ax1.plot(env_dc_2[env_dc_2['host_id'] == '1-1']['timestamp'], env_dc_2[env_dc_2['host_id'] == '1-1']['relative_cumsum_2'], 
                    '^', ls='-', ms=10, color=colors2[4], label=f'{placement_policy} 2 DC, 2', linewidth=2, markevery=0.25)
            
            draw_day_lines(env_dc_1, ax)
            draw_day_lines(env_dc_1, ax1)

            ax.legend()
            ax.grid(True)
            ax.set_ylim(0, top_limit)
            ax.set_title('Lineplot')
            ax.set_xlabel('Time [hours]')
            ax.set_ylabel(f'Energy [{conversion_name}J]')

            ax1.legend()
            ax1.grid(True)
            ax1.set_ylim(0)
            ax1.set_title('Cumulative Energy Relative to 1 DC')
            ax1.set_xlabel('Time [hours]')
            ax1.set_ylabel(f'Percentage [%]')

            
            fig.suptitle('Power consumption comparison 1 DC vs 2 DCs for trace ' + dc_1)
            plt.show()
            
            vs_graph_counter += 1
        else:
            print('trace too short: ', dc_1, ' ', len(env_dc_1))

### Utilization candidates

In [None]:
plot_vs_graphs(utilization_candidates_fmp, theoretical_max, theoretical_min, MAX_NUM_OF_GRAPHS, 'FMP')

### Duration candidates

In [None]:
plot_vs_graphs(duration_candidates_fmp, theoretical_max, theoretical_min, MAX_NUM_OF_GRAPHS, 'FMP')

## 2.d)

### Utilization candidates

In [None]:
plot_vs_graphs(utilization_candidates_lah, theoretical_max, theoretical_min_dvfs, MAX_NUM_OF_GRAPHS, 'LAH')

### Duration candidates

In [None]:
plot_vs_graphs(duration_candidates_lah, theoretical_max, theoretical_min_dvfs, MAX_NUM_OF_GRAPHS, 'LAH')

# Analysis

In [None]:
avg_res_10 = stats_dc1_df[stats_dc1_df['AvgResourceUsage'] >= 0.05].trace_name.values
avg_res_10

In [None]:
wawa = create_dataframe(path_ex1, 'environment.csv')
wawa

In [None]:
wawa[wawa['trace_name'] == 'workflowhub_epigenomics_dataset-hep_grid5000_schema-0-2_epigenomics-hep-g5k-run001_parquet_tpp_fastest_machine_dcs_1']
# wawa[wawa['trace_name'] == 'spec_trace-1_parquet_tpp_look_ahead_dcs_1'].plot(x='timestamp', y='it_power_total')

# line1 = plt.plot([1, 3, 5, 2, 5, 3, 1], c='red', lw=5)
# line2 = plt.plot([7, 2, 5, 7, 5, 2, 7], c='green', lw=5)
# plt.show()

# plt.plot(wawa[wawa['trace_name'] == 'spec_trace-1_parquet_tpp_fastest_machine_dcs_1'].timestamp, wawa[wawa['trace_name'] == 'spec_trace-1_parquet_tpp_fastest_machine_dcs_1'].it_power_total, c='red', alpha=0.5)
# plt.plot(wawa[wawa['trace_name'] == 'spec_trace-1_parquet_tpp_look_ahead_dcs_1'].timestamp, wawa[wawa['trace_name'] == 'spec_trace-1_parquet_tpp_look_ahead_dcs_1'].it_power_total, c='blue', alpha=0.5)
# plt.show()

In [None]:
for i in range(0, len(avg_res_10)):
    if i % 2 == 1: continue
    plt.plot(wawa[wawa['trace_name'] == avg_res_10[i]].timestamp, wawa[wawa['trace_name'] == avg_res_10[i]].it_power_total, alpha=0.5, c='red')
    i += 1
    plt.plot(wawa[wawa['trace_name'] == avg_res_10[i]].timestamp, wawa[wawa['trace_name'] == avg_res_10[i]].it_power_total, alpha=0.5, c='blue')
    plt.show()