In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from textwrap import wrap

# Add parent directory to sys.path in order to find common module
import sys
sys.path.insert(0,'../')

import common.datautils as datautils

In [None]:
average_data = datautils.load_ce_results('CE.results.zip')
average_data.head()

In [None]:
benchmarks = average_data["Benchmark"].unique()
print(benchmarks)

### CE Overview

In [None]:
def improvement_data_frame(data, include_negative):
    improvement = []
    
    for benchmark in benchmarks:
        benchmark_data = data[data["Benchmark"] == benchmark]

        initial_data = benchmark_data[benchmark_data["Flags"] == '-O3']
        initial_energy = initial_data.iloc[0]["Energy"]
        initial_time = initial_data.iloc[0]["Time"]

        baseline_data = benchmark_data[benchmark_data["Type"] == 'baseline']
        if len(baseline_data) > 0:
            final_baseline_index = baseline_data["RunId"].idxmax()
            final_energy = baseline_data.loc[final_baseline_index]["Energy"]
            final_time = baseline_data.loc[final_baseline_index]["Time"]
        else:
            final_energy = initial_energy
            final_time = initial_time
    
        energy_improvement = (1 - final_energy / initial_energy) * 100
        time_improvement = (1 - final_time / initial_time )  * 100
        time_speedup = initial_time / final_time 
        
        if not include_negative:
            if energy_improvement < 0:
                energy_improvement = 0
            
            if time_improvement < 0:
                time_improvement = 0
                time_speedup = 0
            
        
        improvement.append(
            [
                int(np.round(energy_improvement)),
                int(np.round(time_improvement)),
#                 'Time speedup': np.round(time_speedup)
            ])

    return pd.DataFrame(improvement, index=benchmarks, columns=['Energy Improvement', 'Time Improvement'])

def time_overview(data):
    benchmark_times = []
    
    for benchmark in benchmarks:
        benchmark_data = data[data["Benchmark"] == benchmark]
        
        num_baseline_updates = len(benchmark_data[benchmark_data["Type"] == 'baseline'])
        total_runs = len(benchmark_data)
        total_time = np.sum(benchmark_data["Time"])
        total_energy = np.sum(benchmark_data["Energy"])
        
        benchmark_times.append(
            [
                num_baseline_updates,
                total_runs,
                np.round(total_time / 60 / 60, 2), # Time in hours
#                 np.round(total_energy / 1000 / 1000, 2)  # Energy in Mega Joules
            ])
    
    return pd.DataFrame(benchmark_times,
                        index=benchmarks, 
                        columns=[
                            'Number of Baseline Updates',
                            'Total Runs',
                            'Total run time', 
#                             'Total energy consumed'
                        ])

In [None]:
def show_overview(data, include_negative):
    improvement = improvement_data_frame(data, False)
    time = time_overview(data)
    
    average_energy_improvement = np.mean(improvement["Energy Improvement"])
    average_time_improvement = np.mean(improvement["Time Improvement"])
    
    num_energy_improvement = len(improvement[improvement["Energy Improvement"] > 0])
    num_time_improvement = len(improvement[improvement["Time Improvement"] > 0])
    num_energy_time_different = len(improvement[improvement["Time Improvement"] == improvement["Energy Improvement"]])

    print(f"Average energy improvement: {average_energy_improvement}")
    print(f"Average time improvement: {average_time_improvement}")
    print(f"Benchmarks with energy improvement: {num_energy_improvement}")
    print(f"Benchmarks with time improvement: {num_time_improvement}")
    print(f"Energy != Time: {num_energy_time_different}")

    total_runtime = np.sum(time["Total run time"])
#     total_energy = np.sum(time["Total energy consumed"])
    
    print("")
    print(f"Total Run Time: {total_runtime} hours")
#     print(f"Total Energy: {total_energy}")
    
    imp_energy = improvement["Energy Improvement"].values.reshape(-1, 1)
    imp_time = improvement["Time Improvement"].values.reshape(-1, 1)

    plt.figure()
    plt.boxplot(np.concatenate((imp_energy, imp_time), axis=1))
    plt.show()

    return pd.concat([improvement, time], axis=1)

show_overview(average_data, False)

### Find best configuration for Energy

In [None]:
for benchmark in benchmarks:
    print(benchmark)
    print(datautils.best_configuration('Energy', benchmark, average_data))
    print('-----')

### Compare Energy vs Time

In [None]:
def plot_energy_vs_time(benchmark, data):
    benchmark_data = data.loc[data["Benchmark"] == benchmark]
    o3_data = benchmark_data.loc[benchmark_data["Flags"] == "-O3"]
    non_o3_data = benchmark_data.loc[benchmark_data["Flags"] != "-O3"]
    
    X = non_o3_data["Time"].values
    Y = non_o3_data["Energy"].values
    
    plt.figure()
    plt.scatter(X, Y, label='CE Configurations')
    plt.scatter(o3_data["Time"].values, o3_data["Energy"].values, c='r', label='-O3')
    plt.title('Energy vs Time results for CE on ' + benchmark)
    plt.legend()
    plt.grid()
    plt.xlabel('Time (s)')
    plt.ylabel('Energy (J)')

In [None]:
print(len(benchmarks))

In [None]:
for benchmark in benchmarks:
    plot_energy_vs_time(benchmark, average_data)

### Compare Baseline Updates

In [None]:
def plot_energy_vs_runs(benchmark, data):
    benchmark_data = data.loc[(data["Benchmark"] == benchmark) & ((data["Type"] == 'baseline') | (data["Type"] == 'initial'))].sort_values('RunId')
    
    X = np.arange(1, len(benchmark_data) + 1)
    Y = benchmark_data["Energy"].values
    
    plt.figure()
    plt.plot(X, Y)
    plt.title('\n'.join(wrap('Energy vs Baseline Number for Combined Elimination on ' + benchmark, 50)))
    plt.xlabel('Baseline Number')
    plt.ylabel('Energy (J)')
    plt.grid()

In [None]:
for benchmark in benchmarks:
    plot_energy_vs_runs(benchmark, average_data)

### Compare best improvement of Energy and time to O3 baseline

In [None]:
def o3_relative_data(average_data, benchmarks):
    o3_data = average_data.loc[average_data["Flags"] == "-O3"]
    
    relative_data = average_data.copy(deep=True)
    
    for benchmark in benchmarks:
        o3 = o3_data.loc[relative_data["Benchmark"] == benchmark]
        o3_energy = o3.iloc[0]["Energy"]
        o3_time = o3.iloc[0]["Time"]

        relative_data.loc[relative_data["Benchmark"] == benchmark, "Energy"] /= o3_energy
        relative_data.loc[relative_data["Benchmark"] == benchmark, "Time"] /= o3_time

    return relative_data[relative_data["Flags"] != '-O3']


In [None]:
relative_data = o3_relative_data(average_data, benchmarks)

In [None]:
def plot_relative_to_o3(variable, relative_data, benchmarks):
    X = benchmarks
    Y = [relative_data.loc[relative_data["Benchmark"] == benchmark, variable].min() for benchmark in benchmarks]

    plt.figure(figsize=(8,6))
    plt.scatter(X, Y, marker='x', label='CE')
    plt.axhline(1, label='O3')
    
    plt.title('Best improvement of ' + variable + ' relative to -O3 found be CE.')
    plt.ylabel(variable + ' (relative to -O3)')
    plt.xticks(rotation=90)
    plt.legend()
    
def double_plot_relative_to_o3(relative_data, benchmarks):
    X = benchmarks
    energy = [relative_data.loc[relative_data["Benchmark"] == benchmark, 'Energy'].min() for benchmark in benchmarks]
    time = [relative_data.loc[relative_data["Benchmark"] == benchmark, 'Time'].min() for benchmark in benchmarks]
    

    plt.figure(figsize=(16, 8))
    plt.scatter(X, energy, marker='o', label='Energy')
    plt.scatter(X, time, marker='x', label='Time')
    
    plt.axhline(1, label='O3')
    
    plt.title('Effect of best configuration found by CE on Energy and Time relative to -O3.')
    plt.ylabel('Relative to -O3)')
    plt.xticks(rotation=90)
    plt.yticks(np.arange(0.5, 1.5, 0.1))
    plt.legend()
    plt.grid()

In [None]:
double_plot_relative_to_o3(relative_data, benchmarks)

In [None]:
plot_relative_to_o3('Energy', relative_data, benchmarks)
plot_relative_to_o3('Time', relative_data, benchmarks)