In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
data = pd.read_csv("fix_flags.bc.20180728-102532.csv") #NPB


data.rename(columns=lambda x:x.strip(), inplace=True) # Remove whitespace from column headers
data.loc[:, 'Energy'] *= 1e-6 # Convert energy to Joules
data.head()

In [None]:
# Find all runs that failed
failed = data[data['Success'] == False]
failed

In [None]:
no_o0 = data[data["Flags"] != "-O0"]

In [None]:
def plot_benchmark_results(benchmark, frame):
    data = frame[frame["Benchmark"] == benchmark]
    
    flags = ['O1', 'O2', 'O3']
    markers = ['x', 'o', 'v']
    
    fig, ax = plt.subplots()
    for i in range(len(flags)):
        flag = flags[i]
        marker = markers[i]
        
        flag_data = data[data["Flags"] == "-" + flag]
        
        x = flag_data["Time"].values
        y = flag_data["Energy"].values
        
        ax.scatter(x, y, marker=marker, label=flag)

    plt.legend()        
    plt.xlabel('Time (s)')
    plt.ylabel('Energy (J)')
    plt.title('Energy vs Time for ' + benchmark + ' benchmark')

def summary(data, variable):
    mean = np.mean(data[variable].values)
    median = np.median(data[variable].values)
    min = np.min(data[variable].values)
    max = np.max(data[variable].values)
    range = max - min
    relative_range = range / mean * 100
    std = np.std(data[variable].values)
    coeff_var = std / mean * 100
    
    return [mean, median, min, max, range, relative_range, std, coeff_var]

def summary_frame(array):
    return pd.DataFrame(array[:,1:], 
                       index=array[:,0],
                       dtype=float,
                       columns=['Mean', 'Median', 'Min', 'Max', 'Range', 'Relative Range', 'Std', 'CV'])

In [None]:
benchmarks = data["Benchmark"].unique()

summary_stats_energy = []
summary_stats_time = []

for benchmark in benchmarks:
    benchmark_o3_data = data[(data["Benchmark"] == benchmark) & (data["Flags"] == "-O3") & (data["Success"] == True)]
    if len(benchmark_o3_data) == 0:
        continue
    
    summary_stats_energy.append([benchmark] + summary(benchmark_o3_data, "Energy"))
    summary_stats_time.append([benchmark] + summary(benchmark_o3_data, "Time"))

In [None]:
summary_frame_energy = summary_frame(np.array(summary_stats_energy))
summary_frame_energy

In [None]:
summary_frame_time = summary_frame(np.array(summary_stats_time))
summary_frame_time

In [None]:
# energy_low_cv = set(summary_frame_energy[summary_frame_energy["CV"] < 1.0].index)
# time_low_cv = set(summary_frame_time[summary_frame_time["CV"] < 1.0].index)
# print(energy_low_cv.intersection(time_low_cv))

stable_benchmarks_time = summary_frame_time[summary_frame_time["CV"] < 0.1].index
stable_benchmarks_time

In [None]:
stable_benchmarks_energy = summary_frame_energy[summary_frame_energy["CV"] < 0.1].index
stable_benchmarks_energy

In [None]:
for benchmark in benchmarks:
    plot_benchmark_results(benchmark, no_o0)