In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
data = pd.read_csv('CE.bug_fix_build_clean.csv')
data.loc[:, 'Energy'] *= 1e-6 # Convert energy to Joules
data.loc[:, 'Benchmark'] = data['Benchmark'].str.replace('.x', '')
data.head()

In [None]:
average_data = data.groupby(['Benchmark','Flags', 'Type', 'RunId'], as_index=False).agg({'Energy':'mean', 'Time':'mean'})

In [None]:
average_data[average_data["Flags"] == '-O3']

In [None]:
#pd.set_option("display.max_rows",1001)
#pd.set_option("display.max_colwidth",10001)

pd.set_option("display.max_rows",60)
pd.set_option("display.max_colwidth",50)

pd.get_option("display.max_rows")
pd.get_option("display.max_colwidth")

In [None]:
average_data.loc[(average_data["Benchmark"] == 'botsalgn') & ((average_data["Type"] == 'baseline') | (average_data["Type"] == 'initial'))].sort_values('RunId').tail()

In [None]:
benchmarks = data["Benchmark"].unique()
print(benchmarks)

### Find best configuration for Energy

In [None]:
def best_configuration(variable, benchmark, average_data):
    benchmark_data = average_data.loc[average_data["Benchmark"] == benchmark]
    min_index = benchmark_data[variable].idxmin()
    return benchmark_data.loc[min_index]["Flags"]
    

In [None]:
for benchmark in benchmarks:
    print(benchmark)
    print(best_configuration('Energy', benchmark, average_data))
    print('-----')

### Compare Energy vs Time

In [None]:
def plot_energy_vs_time(benchmark, data):
    benchmark_data = data.loc[data["Benchmark"] == benchmark]
    o3_data = benchmark_data.loc[benchmark_data["Flags"] == "-O3"]
    non_o3_data = benchmark_data.loc[benchmark_data["Flags"] != "-O3"]
    
    X = non_o3_data["Time"].values
    Y = non_o3_data["Energy"].values
    
    plt.figure()
    plt.scatter(X, Y, label='CE Configurations')
    plt.scatter(o3_data["Time"].values, o3_data["Energy"].values, c='r', label='-O3')
    plt.title('Energy vs Time results for Combined Elimination on ' + benchmark)
    plt.legend()
    plt.xlabel('Time (s)')
    plt.ylabel('Energy (J)')

In [None]:
for benchmark in benchmarks:
    plot_energy_vs_time(benchmark, average_data)

### Compare Energy vs Runs

In [None]:
def plot_energy_vs_runs(benchmark, data):
    benchmark_data = data.loc[(data["Benchmark"] == benchmark) & ((data["Type"] == 'baseline') | (data["Type"] == 'initial'))].sort_values('RunId')
    
    X = benchmark_data["RunId"].values
    Y = benchmark_data["Energy"].values
    
    plt.figure()
    plt.plot(X, Y)
    plt.title('Energy vs run id for Combined Elimination on ' + benchmark)
    plt.xlabel('Run Id')
    plt.ylabel('Energy (J)')

In [None]:
for benchmark in benchmarks:
    plot_energy_vs_runs(benchmark, average_data)

### Compare best improvement of Energy and time to O3 baseline

In [None]:
def o3_relative_data(average_data, benchmarks):
    o3_data = average_data.loc[average_data["Flags"] == "-O3"]
    
    relative_data = average_data.copy(deep=True)
    
    for benchmark in benchmarks:
        o3 = o3_data.loc[relative_data["Benchmark"] == benchmark]
        o3_energy = o3.iloc[0]["Energy"]
        o3_time = o3.iloc[0]["Time"]

        relative_data.loc[relative_data["Benchmark"] == benchmark, "Energy"] /= o3_energy
        relative_data.loc[relative_data["Benchmark"] == benchmark, "Time"] /= o3_time

    return relative_data


def plot_relative_to_o3(variable, relative_data, benchmarks):
    X = benchmarks
    Y = [relative_data.loc[relative_data["Benchmark"] == benchmark, variable].min() for benchmark in benchmarks]

    plt.figure()
    plt.scatter(X, Y, marker='x', label='CE')
    plt.axhline(1, label='O3')
    
    plt.title('Best relative improvement of ' + variable + ' relative to -O3')
    plt.ylabel(variable + ' (relative to -O3)')
    plt.legend()


In [None]:
relative_data = o3_relative_data(average_data, benchmarks)
plot_relative_to_o3('Energy', relative_data, benchmarks)
plot_relative_to_o3('Time', relative_data, benchmarks)