In [1]:
# Imports
import matplotlib.pyplot as plt
import numpy as np
import matplotlib as mpl
from tabulate import tabulate

In [2]:
# Constants
regions = ['Great Britain', 'Germany', 'California', 'Texas', 'South Africa', 'Tokyo', 'New South Wales']
short_regions = ['gb', 'de', 'ca', 'tx', 'zaf', 'tyo', 'nsw']
months = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']
short_months = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec'] 
tasks=['bowtie2_build', 'fastp', 'fastqc', 'trimgalore']
machines=['gcpc2', 'gcpn2', 'gcpn1', 'gpg13', 'gpg14', 'gpg15', 'gpg22', 'hu26', 'server']
exp_machines=['gcpc2', 'gcpn2', 'gcpn1', 'gpg13-performance', 'gpg14-performance', 'gpg15-performance', 'gpg22-performance', 'hu26-performance', 'server-performance']
models=['gcpc2_default_minmax', 'gcpn2_default_minmax', 'gcpn1_default_minmax', 'gpg13_performance_linear', 'gpg14_performance_linear', 'gpg15_performance_linear', 'gpg22_performance_linear', 'hu26_performance_linear', 'server_performance_linear']

In [3]:
# Constants
ENERGY_CONSUMPTION = "Energy Consumption (exc. PUE)"
ENERGY_CONSUMPTION_PUE = "Energy Consumption (inc. PUE)"
MEMORY_CONSUMPTION = "Memory Energy Consumption (exc. PUE)"
MEMORY_CONSUMPTION_PUE = "Memory Energy Consumption (inc. PUE)"
CARBON_EMISSIONS = "Operational Carbon Emissions"
EMBODIED_CARBON_EMISSIONS = "Embodied Carbon Emissions"
TOTAL_CARBON_EMISSIONS = "Total Carbon Emissions"
RES_MEM_ENERGY_CONSUMPTION = "Reserved Memory Energy Consumption"
RES_MEM_CARBON_EMISSIONS = "Reserved Memory Carbon Emissions"

In [4]:
# Parse Key Details from Summary Files
def get_data(lines):
    data = {}

    for line in lines:
        parts = line.strip().split(':')
        parts[0] = parts[0][2:].strip()
        data[parts[0]] = parts[1].replace('kWh', '').replace('gCO2e', '').strip()

    return data


def parse_summary(filename):
    with open(filename, 'r') as file:
        lines = file.readlines()

    info = get_data(lines[2:5])
    data = get_data(lines[7:14])

    return (info, data)


def get_average(first, second, third):
    data = {}

    for key in first.keys():
        data[key] = (float(first[key]) + float(second[key]) + float(third[key])) / 3

    return data


def print_summary(data):
    return {
        ENERGY_CONSUMPTION: data[ENERGY_CONSUMPTION],
        MEMORY_CONSUMPTION: data[MEMORY_CONSUMPTION],
        CARBON_EMISSIONS: data[CARBON_EMISSIONS],
        EMBODIED_CARBON_EMISSIONS: data[EMBODIED_CARBON_EMISSIONS],
        TOTAL_CARBON_EMISSIONS: data[TOTAL_CARBON_EMISSIONS]
    }


def print_info(data_1, data_2, data_3):
    matches = data_1 == data_2 == data_3

    if not matches:
        print("[ERROR] Workflow Data does not match ...")
        return

def report_summary_for(filename_1, filename_2, filename_3):
    (_, data_1) = parse_summary(filename_1)
    (_, data_2) = parse_summary(filename_2)
    (_, data_3) = parse_summary(filename_3)
    avg_data = get_average(data_1, data_2, data_3)
    summary = print_summary(avg_data)

    return summary

In [5]:
# Process Data for each task on each machine, in each month of the year, for each region 
task_stats_avg = {}

for region in short_regions:
    task_stats_avg[region] = {}
    for month in short_months:
        task_stats_avg[region][month] = {}
        for task in tasks:
            task_stats_avg[region][month][task] = {}
            for machine, model in zip(machines, models):
                temp_f = [f'../data/results/resource-assignment/out/{region}/{machine}-{task}-{iteration}-{month}-{region}-2024-{month}-mid-{model}-summary.txt' for iteration in range(1, 4)]
                task_stats_avg[region][month][task][machine] = report_summary_for(temp_f[0], temp_f[1], temp_f[2])


In [6]:
# Process Data for each task on each machine, in each month of the year, for each region 
task_stats_marg = {}

for region in short_regions:
    task_stats_marg[region] = {}
    for month in short_months:
        task_stats_marg[region][month] = {}
        for task in tasks:
            task_stats_marg[region][month][task] = {}
            for machine, model in zip(machines, models):
                temp_f = [f'../data/results/resource-assignment/out/{region}/{machine}-{task}-{iteration}-{month}-{region}-2024-{month}-mid-marg-{model}-summary.txt' for iteration in range(1, 4)]
                task_stats_marg[region][month][task][machine] = report_summary_for(temp_f[0], temp_f[1], temp_f[2])


In [7]:
def print_stats_for_month(month, all_data_avg, all_data_marg, task_runtimes):
    headers = ['task', 'machine', 'runtime (h)', 'energy consumption (kWh)', 'average emissions (gCO2e)', 'marginal emissions (gCO2e)']
    table_data = []

    for region in short_regions:
        for task in tasks:
            for machine in machines:
                curr = task_stats_avg[region][month][task][machine]
                curr_energy = round(curr[ENERGY_CONSUMPTION] + curr[MEMORY_CONSUMPTION], 3)
                curr_emissions = round(curr[CARBON_EMISSIONS], 2)
                # curr_emb_emissions = round(curr[EMBODIED_CARBON_EMISSIONS], 2)
                curr_runtime = round(task_runtimes[task][machine], 2)
                curr_marg = task_stats_marg[region][month][task][machine]
                # curr_marg_energy = round(curr_marg[ENERGY_CONSUMPTION] + curr_marg[MEMORY_CONSUMPTION], 2)
                curr_marg_emissions = round(curr_marg[CARBON_EMISSIONS], 2)
                # curr_marg_emb_emissions = round(curr_marg[EMBODIED_CARBON_EMISSIONS], 2)

                table_data.append([task, machine, curr_runtime, curr_energy, curr_emissions, curr_marg_emissions])

    print("Comparison of Tasks @ Varied Frequencies using Average CI Data")
    print(tabulate(table_data, headers, tablefmt='orgtbl'))

    return table_data


In [9]:
def get_runtime(filename):
    with open(filename, 'r') as f:
        lines = f.readlines()

    parts = lines[1].split(',')
    return int(parts[19])

all_task_runtimes = {}

for task in tasks:
    all_task_runtimes[task] = {}
    for machine, exp_machine in zip(machines, exp_machines):
        filebase = f'../data/traces/tasks/{task}/{exp_machine}-{task}-ITER.csv'
        runtime_1 = get_runtime(filebase.replace('ITER', '1'))
        runtime_2 = get_runtime(filebase.replace('ITER', '2'))
        runtime_3 = get_runtime(filebase.replace('ITER', '3'))
        avg_runtime = (runtime_1 + runtime_2 + runtime_3) / 3
        all_task_runtimes[task][machine] = avg_runtime / 1000 / 3600

In [10]:
all_ra_data = {}

for month in short_months:
    print('\n' + month)
    month_data = print_stats_for_month(month, task_stats_avg, task_stats_marg, all_task_runtimes)

    for row in month_data:
        c_task = row[0]
        c_mach = row[1]

        if c_task in all_ra_data:
            if c_mach in all_ra_data[c_task]: 
                all_ra_data[c_task][c_mach]['avg emissions'].append(row[4])
                all_ra_data[c_task][c_mach]['marg emissions'].append(row[5])
            else: 
                all_ra_data[c_task][c_mach] = {
                    'runtime': row[2],
                    'energy': row[3],
                    'avg emissions': [row[4]],
                    'marg emissions': [row[5]]
                }
        else:
            all_ra_data[c_task] = {} 
            all_ra_data[c_task][c_mach] = {
                    'runtime': row[2],
                    'energy': row[3],
                    'avg emissions': [row[4]],
                    'marg emissions': [row[5]]
                }


jan
Comparison of Tasks @ Varied Frequencies using Average CI Data
| task          | machine   |   runtime (h) |   energy consumption (kWh) |   average emissions (gCO2e) |   marginal emissions (gCO2e) |
|---------------+-----------+---------------+----------------------------+-----------------------------+------------------------------|
| bowtie2_build | gcpc2     |          0.16 |                      0.003 |                        0.68 |                         0.7  |
| bowtie2_build | gcpn2     |          0.18 |                      0.001 |                        0.24 |                         0.27 |
| bowtie2_build | gcpn1     |          0.25 |                      0.001 |                        0.26 |                         0.37 |
| bowtie2_build | gpg13     |          0.25 |                      0.016 |                        3.58 |                         5.04 |
| bowtie2_build | gpg14     |          0.25 |                      0.016 |                        3.65 |            

In [11]:
headers = ['task', 'machine', 'runtime (h)', 'energy consumption (kWh)', 'average emissions (gCO2e)', 'marginal emissions (gCO2e)']
mean_data = []

for task in tasks:
    for machine in machines:
        entry = all_ra_data[task][machine]
        mean_avg_ems = sum(entry['avg emissions']) / len(entry['avg emissions'])
        mean_marg_ems = sum(entry['marg emissions']) / len(entry['marg emissions'])
        mean_data.append([task, machine, entry['runtime'], entry['energy'], round(mean_avg_ems, 2), round(mean_marg_ems, 2)])

print(tabulate(mean_data, headers, tablefmt='orgtbl'))

with open('../data/results/resource-assignment/table_mean_data.csv', 'w') as f:
    f.write(','.join(headers) + '\n')

    for row in mean_data: 
        f.write(','.join([str(v) for v in row]) + '\n')


| task          | machine   |   runtime (h) |   energy consumption (kWh) |   average emissions (gCO2e) |   marginal emissions (gCO2e) |
|---------------+-----------+---------------+----------------------------+-----------------------------+------------------------------|
| bowtie2_build | gcpc2     |          0.16 |                      0.003 |                        0.99 |                         0.83 |
| bowtie2_build | gcpn2     |          0.18 |                      0.001 |                        0.34 |                         0.33 |
| bowtie2_build | gcpn1     |          0.25 |                      0.001 |                        0.38 |                         0.45 |
| bowtie2_build | gpg13     |          0.25 |                      0.016 |                        5.18 |                         6.11 |
| bowtie2_build | gpg14     |          0.25 |                      0.016 |                        5.28 |                         6.25 |
| bowtie2_build | gpg15     |          0.25 |   