In [None]:
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt

from textwrap import wrap

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold, LeavePOut
from sklearn.neighbors import KNeighborsClassifier

import sys
sys.path.insert(0,'../')
import common.datautils as datautils
import common.featuresutils as featuresutils

In [None]:
ce_data = datautils.load_ce_results('CE.results.zip')
ce_data.head()

In [None]:
benchmarks = ce_data["Benchmark"].unique()
benchmarks = np.delete(benchmarks, 16) # Remove LBM
print(benchmarks)

In [None]:
X, y = featuresutils.load_features(benchmarks, with_dwarf=False, with_names=True)
X = StandardScaler().fit_transform(X)

In [None]:
cross_apply_data = datautils.load_csv_results(
    'cross_apply.results.csv',
    ['ReferenceBenchmark','ApplyToBenchmark', 'Success'], 
    benchmark_cols=['ReferenceBenchmark', 'ApplyToBenchmark'],
    successful_only=False
)

In [None]:
multi_label_data = datautils.load_csv_results('ML.20180904-134133.csv', ['Benchmark', 'Flags', 'Success'])
multi_label_data.head()

In [None]:
def percentage_difference(best_value, measured_value):
    difference = (measured_value - best_value) / best_value
    return np.round(difference * 100)

def calculate_percent_of_best():
    for benchmark in benchmarks:
        best_config_data = datautils.best_configuration_data('Energy', benchmark, ce_data, ['Energy', 'Time'])

        benchmark_ca_data = cross_apply_data[cross_apply_data["ApplyToBenchmark"] == benchmark]
        cross_apply_data.loc[benchmark_ca_data.index, 'EnergyPercentOfBest'] = percentage_difference(best_config_data[0], benchmark_ca_data["Energy"])
        cross_apply_data.loc[benchmark_ca_data.index, 'TimePercentOfBest'] = percentage_difference(best_config_data[1], benchmark_ca_data["Time"])
        

calculate_percent_of_best()

cross_apply_data.head()

In [None]:
max(cross_apply_data["EnergyPercentOfBest"])

### Percentage difference when cross applying best known configurations to other programs

In [None]:
def plot_percentage_difference(data):
    plt.figure(figsize=(14,14))
    
    points = np.arange(0, len(benchmarks))
    
    for i, apply_to_benchmark in enumerate(benchmarks):
        for j, reference_benchmark in enumerate(benchmarks):
            percentage_difference = data[(data["ReferenceBenchmark"] == reference_benchmark)
                                         & (data["ApplyToBenchmark"] == apply_to_benchmark)]["EnergyPercentOfBest"].iloc[0]
                      
            if percentage_difference == -100:
                plt.scatter(i, j, marker='_')
            else:
                if i == j or percentage_difference < 0:
                    percentage_difference = 0
                    
                if percentage_difference > 100:
                    percentage_difference = 150
                
                plt.scatter(i, j, marker='s', c='k', s=percentage_difference)
#     plt.scatter(points, points)

    plt.title('\n'.join(wrap("Percentage difference in Energy consumption when cross applying the best known configuration for the `Reference` benchmark to `Applied To` benchmark.", 120)))
    plt.xlabel("Applied To")
    plt.ylabel("Reference")
    plt.xticks(points, benchmarks, rotation='vertical')
    plt.yticks(points, benchmarks)
    
plot_percentage_difference(cross_apply_data)

### NN Leave P out Cross Validation

In [None]:
def add_value(dict, key, value):
    if not key in dict.keys():
        dict[key] = []
        
    dict[key].append(value)
    

def leave_p_out(p, data):
    lpo = LeavePOut(p)
    
    benchmarks_energy = {}
    benchmarks_time = {}
    
    for train_index, test_index in lpo.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

        nn = KNeighborsClassifier(n_neighbors=1)
        nn.fit(X_train, y_train)
        predictions = nn.predict(X_test)
        
        for i in range(len(y_test)):
            apply_to_benchmark = y_test[i]
            predicted_benchmark = predictions[i]
            
            energy = data[(data["ReferenceBenchmark"] == predicted_benchmark) 
                          & (data["ApplyToBenchmark"] == apply_to_benchmark)]["Energy"].iloc[0]
            
            time = data[(data["ReferenceBenchmark"] == predicted_benchmark) 
                          & (data["ApplyToBenchmark"] == apply_to_benchmark)]["Time"].iloc[0]
        
            
            add_value(benchmarks_energy, apply_to_benchmark, energy)
            add_value(benchmarks_time, apply_to_benchmark, time)

    energy_averages = []        
    time_averages = []
    
    for benchmark in benchmarks:
        energy_averages.append(np.mean(benchmarks_energy[benchmark]))
        time_averages.append(np.mean(benchmarks_time[benchmark]))
    
    return np.array(energy_averages).reshape(-1, 1), np.array(time_averages).reshape(-1, 1)
            
    
def compare_p(data):
    test_p = [1]#,2,3]#,4]
    test_p_str = ['LeaveOneOutEnergy', 'LeaveOneOutTime']#, 'LeaveTwoOut', 'LeaveThreeOut'] #'LeaveFourOut']
    
    energy_averages = []       
    time_averages = []
    for i in test_p:
        p_averages_energy, p_averages_time = leave_p_out(i, data)
        
        energy_averages.append(p_averages_energy)
        time_averages.append(p_averages_time)
    
    averages = np.concatenate(energy_averages + time_averages, axis=1)
    
    return pd.DataFrame(averages, index=benchmarks, columns=test_p_str)


def cross_o3_relative_data(leave_out_data, average_data, benchmarks):
    o3_data = average_data.loc[average_data["Flags"] == "-O3"]
    
    relative_data = leave_out_data.copy(deep=True)
    
    for benchmark in benchmarks:
        o3 = o3_data.loc[average_data["Benchmark"] == benchmark]
        o3_energy = o3.iloc[0]["Energy"]
        o3_time = o3.iloc[0]["Time"]

        relative_data.loc[benchmark, "LeaveOneOutEnergy"] /= o3_energy
        relative_data.loc[benchmark, "LeaveOneOutTime"] /= o3_time
#         relative_data.loc[benchmark, "LeaveTwoOut"] /= o3_energy
#         relative_data.loc[benchmark, "LeaveThreeOut"] /= o3_energy

    return relative_data


def ce_o3_relative_data(average_data, benchmarks):
    o3_data = average_data.loc[average_data["Flags"] == "-O3"]
    
    relative_data = average_data.copy(deep=True)
    
    for benchmark in benchmarks:
        o3 = o3_data.loc[o3_data["Benchmark"] == benchmark]
        o3_energy = o3.iloc[0]["Energy"]
        o3_time = o3.iloc[0]["Time"]

        relative_data.loc[relative_data["Benchmark"] == benchmark, "Energy"] /= o3_energy
        relative_data.loc[relative_data["Benchmark"] == benchmark, "Time"] /= o3_time

    return relative_data

def multilabel_o3_relative_data(ce_data, multilabel_data, benchmarks):
    o3_data = ce_data.loc[ce_data["Flags"] == "-O3"]
    
    relative_data = multilabel_data.copy(deep=True)
    
    for benchmark in benchmarks:
        o3 = o3_data.loc[o3_data["Benchmark"] == benchmark]
        o3_energy = o3.iloc[0]["Energy"]
        o3_time = o3.iloc[0]["Time"]

        relative_data.loc[relative_data["Benchmark"] == benchmark, "Energy"] /= o3_energy
        relative_data.loc[relative_data["Benchmark"] == benchmark, "Time"] /= o3_time

        
    relative_data.loc[:, "Energy"] = np.round(relative_data["Energy"], 2)
    relative_data.loc[:, "Time"] = np.round(relative_data["Time"], 2)
    return relative_data

In [None]:
ce_relative_data = ce_o3_relative_data(ce_data, benchmarks)
ce_relative_data.head()

In [None]:
cross_relative_data = cross_o3_relative_data(compare_p(cross_apply_data), ce_data, benchmarks)
cross_relative_data

In [None]:
multilabel_relative_data = multilabel_o3_relative_data(ce_data, multi_label_data, benchmarks)
multilabel_relative_data = multilabel_relative_data[multilabel_relative_data["Benchmark"] != 'lbm']
multilabel_relative_data

In [None]:
print(f"Improved {len(multilabel_relative_data[multilabel_relative_data['Energy'] <= 1])} / 19 on energy")
print(f"Average energy reduction for good configs {np.round(1 - np.mean(multilabel_relative_data[multilabel_relative_data['Energy'] < 1]['Energy']) * 100)}")
print(f"Average energy reduction for all configs {np.round(1 - np.mean(multilabel_relative_data['Energy']))}")

print(f"Improved {len(multilabel_relative_data[multilabel_relative_data['Time'] <= 1])} / 19 on time")
print(f"Average time reduction for good configs {np.round(1 - np.mean(multilabel_relative_data[multilabel_relative_data['Time'] < 1]['Time']) * 100)}")
print(f"Average time reduction for all configs {np.round(1 - np.mean(multilabel_relative_data['Time']))}")


np.mean(multilabel_relative_data['Energy'])

In [None]:
def plot_relative_to_o3(cross_relative_data, ce_relative_data, ml_relative_data, variable):
    X = list(benchmarks)
    
    #X.remove('lbm') # Something wrong here
    
    best_known = [ce_relative_data.loc[ce_relative_data["Benchmark"] == benchmark, variable].min() for benchmark in X]
    
    nn_loo_energy = [cross_relative_data.loc[benchmark, 'LeaveOneOut' + variable] for benchmark in X]
#     nn_lto_energy = [cross_relative_data.loc[benchmark, 'LeaveTwoOut'] for benchmark in X]
#     nn_ltho_energy = [cross_relative_data.loc[benchmark, 'LeaveThreeOut'] for benchmark in X]
    
    ml_loo_energy = [ml_relative_data.loc[ml_relative_data["Benchmark"] == benchmark, variable].iloc[0] for benchmark in X]
    
#     time = [relative_data.loc[relative_data["Benchmark"] == benchmark, 'Time'].min() for benchmark in benchmarks]
    

    plt.figure(figsize=(16, 8))
    plt.scatter(X, best_known, label='Best Known')
    
    plt.scatter(X, nn_loo_energy,  label='1NN (Leave One Out)')
#     plt.scatter(X, nn_lto_energy, marker='x', label='1NN (Leave Two Out)')
#     plt.scatter(X, nn_ltho_energy, marker='v', label='1NN (Leave Three Out)')

    plt.scatter(X, ml_loo_energy, label='CC (Leave One Out)')

    plt.axhline(1, label='O3')
    
    plt.title('Effect of best known config, 1NN predicted config and CC predicted config on ' + variable + ' relative to -O3.')
    plt.ylabel('Relative to -O3)')
    plt.xticks(rotation=90)
    plt.yticks(np.arange(0.4, 1.6, 0.1))
    plt.legend()
    plt.grid()
    
    
plot_relative_to_o3(cross_relative_data, ce_relative_data, multilabel_relative_data, 'Energy')

In [None]:
plot_relative_to_o3(cross_relative_data, ce_relative_data, multilabel_relative_data, 'Time')