In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from textwrap import wrap

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

import sys
sys.path.insert(0,'../')

import common.datautils as datautils
import common.featuresutils as featuresutils
import common.flagutils as flagutils

In [None]:
average_data = datautils.load_ce_results('CE.results.zip')
average_data.loc[:, "Benchmark"] = average_data["Benchmark"].apply(lambda x: x.split('.')[0])

benchmarks = average_data["Benchmark"].unique()
features = datautils.load_features(benchmarks)

all_flags = flagutils.load_flag_list()

average_data.head()

In [None]:
test = average_data[average_data["Flags"] != '-O3'][["Benchmark", "Flags"]]
test["Benchmark"].unique()

In [None]:
def get_features(benchmark):
    for i in range(len(features)):
        if benchmarks[i] == benchmark:
            return features[i]
        
    raise ValueError("Features not found for " + benchmark)
    
def config_as_labels(config_str):
    labels = []
    config = config_str[4:].split(' ')
    
    for i, flag in enumerate(config):
        if flag == all_flags[i]:
            labels.append(1)  # Flag is turned on
        elif flag == '-fno-' + all_flags[i][2:]:
            labels.append(0)  # FLag is turned off -fno
        else:
            raise ValueError("ERROR:" + flag)
            
    return np.array(labels)

def combine_data():
    combined = []

    for benchmark, config in average_data[average_data["Flags"] != '-O3'][["Benchmark", "Flags"]].values:
        features = get_features(benchmark)
        labels = config_as_labels(config)
        
        combined.append(labels)
        #combined.append(np.concatenate((features, labels)))
    
    return np.array(combined)

def get_target():
    o3_values = average_data[average_data["Flags"] == '-O3']
    
    targets = []
    for benchmark, test_energy in average_data[average_data["Flags"] != '-O3'][["Benchmark", "Energy"]].values:
        o3_energy = o3_values[o3_values["Benchmark"] == benchmark]["Energy"].values[0]
        
        if test_energy < o3_energy:
            targets.append('Better')
        else:
            targets.append('Worse')
            
    return pd.DataFrame(targets, columns=['Target'])

In [None]:
test = get_target()

In [None]:
test.head()

In [None]:
test = test.sort_values('Target', ascending=False)
test = test.reset_index(drop=True)
print(test[test["Target"] == 'Better'].index.min())
print(test[test["Target"] == 'Better'].index.max())
print("")
print(test[test["Target"] == 'Worse'].index.min())
print(test[test["Target"] == 'Worse'].index.max())

In [None]:
X = StandardScaler().fit_transform(combine_data())

pca = PCA(n_components=5)
principal_components = pca.fit_transform(X)
component_labels = ['PC1', 'PC2', 'PC3', 'PC4', 'PC5']
principal_frame = pd.DataFrame(data=principal_components, columns=component_labels)

target = get_target()
target = target.sort_values('Target', ascending=False)
target = target.reset_index(drop=True)

principal_frame = pd.concat([principal_frame, target], axis=1)

In [None]:
# Generate SCREE Plot
fig = plt.figure(figsize=(16,6))
ax = fig.add_subplot(1, 2, 1)
ax.bar(component_labels, pca.explained_variance_ratio_)
ax.set_title('Explained Variance of Principal Components')
ax.set_xlabel('Principal Component')
ax.set_ylabel('Explained Variance Ratio')


ax = fig.add_subplot(1, 2, 2)
ax.set_xlabel('PC1')
ax.set_ylabel('PC2')
ax.set_title('2 Component PCA plot of stable benchmarks using Milepost features')
ax.grid()

better_configs = principal_frame[principal_frame["Target"] == 'Better']
worse_configs = principal_frame[principal_frame["Target"] == 'Worse']


# ax.scatter(better_configs['PC1'], 
#            better_configs['PC2'], 
#            label='Better',
#            marker='x')

# ax.scatter(worse_configs['PC1'], 
#            worse_configs['PC2'],
#           label='Worse')


ax.scatter(principal_frame['PC1'], 
           principal_frame['PC2'])


ax.legend()