In [1]:
import pandas as pd
import numpy as np

import sys
sys.path.insert(0,'../')

from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from skmultilearn.problem_transform import BinaryRelevance, ClassifierChain
    
from common.featuresutils import load_features
from common.flagutils import load_flag_list
from common.flagutils import load_o3_flags
from common.flagutils import build_config
from common.flagutils import get_cmd_string_from_config

In [2]:
data = pd.read_csv('CE.results.csv')
data.loc[:, 'Energy'] *= 1e-6 # Convert energy to Joules
data.loc[:, 'Benchmark'] = data['Benchmark'].str.replace('.x', '')

average_data = data.groupby(['Benchmark','Flags', 'Type', 'RunId'], as_index=False).agg({'Energy':'mean', 'Time':'mean'})
average_data.head()

Unnamed: 0,Benchmark,Flags,Type,RunId,Energy,Time
0,BT.C,-O3,O3,-1,8685.049255,44.226147
1,BT.C,-O3 -faggressive-loop-optimizations -falign-fu...,test,80,8776.150006,43.287949
2,BT.C,-O3 -faggressive-loop-optimizations -falign-fu...,test,82,8918.038919,43.911705
3,BT.C,-O3 -faggressive-loop-optimizations -falign-fu...,test,83,8861.715663,43.573321
4,BT.C,-O3 -faggressive-loop-optimizations -falign-fu...,test,84,8867.013709,43.667829


In [3]:
all_flags = load_flag_list()
print(len(all_flags))

187


In [4]:
def best_configuration(variable, benchmark, average_data):
    benchmark_data = average_data.loc[average_data["Benchmark"] == benchmark]
    min_index = benchmark_data[variable].idxmin()
    return benchmark_data.loc[min_index]["Flags"]


def get_o3_config():
    return get_cmd_string_from_config(build_config(all_flags, load_o3_flags(), '-O3'))

In [5]:
benchmarks = data["Benchmark"].unique()
print(benchmarks)

['nab' 'imagick' 'botsalgn' 'botsspar' 'kdtree' 'smithwa' 'BT.C' 'UA.C'
 'swim' 'EP.D']


In [6]:
X = load_features([b.split('.')[0] for b in benchmarks])
y = []

for benchmark in benchmarks:
    config_str = best_configuration('Energy', benchmark, average_data)
    if config_str == '-O3':
        config_str = get_o3_config()
    
    labels = []
    
    config = config_str[4:].split(' ')
    for i, flag in enumerate(config):
        if flag == all_flags[i]:
            labels.append(1) # Flag is turned on
        elif flag == '-fno-' + all_flags[i][2:]:
            labels.append(0) # FLag is turned off -fno
        else:
            print("ERROR:" + flag)
    
    y.append(labels)

y = np.array(y)
print(X.shape)
print(y.shape)

(10, 65)
(10, 187)


In [7]:
normalised_X = StandardScaler().fit_transform(X)

In [8]:
cols_to_remove = []
for i in range(y.shape[1]):
    unique, counts = np.unique(y[:, i], return_counts=True)
    
    if (len(unique) == 1) or (1 in counts):
        cols_to_remove.append(i)
        
small_y = np.delete(y, cols_to_remove, axis=1)
print(small_y.shape)

(10, 85)


In [9]:
import warnings
warnings.filterwarnings('ignore')

In [10]:
def grid_search(classifier, parameters):
    gs = GridSearchCV(classifier, parameters, cv=10, scoring='f1_macro', n_jobs=-1)
    gs.fit(normalised_X, small_y)

    return gs

In [11]:
parameters = {
    'classifier': [SVC(kernel='linear')],
    'classifier__C': np.logspace(-2, 10, 13)
}
linear_gs = grid_search(BinaryRelevance(), parameters)

print(f"Linear Best F1 Score: {linear_gs.best_score_}")
print(linear_gs.best_params_)

parameters = {
    'classifier': [SVC(kernel='rbf')],
    'classifier__C': np.logspace(-2, 10, 13),
    'classifier__gamma':  np.logspace(-9, 3, 13)
}
gaussian_gs = grid_search(BinaryRelevance(), parameters)
print(f"Gaussian Best F1 Score: {gaussian_gs.best_score_}")
print(gaussian_gs.best_params_)

Linear Best F1 Score: 0.6376470588235293
{'classifier': SVC(C=0.01, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False), 'classifier__C': 0.01}
Gaussian Best F1 Score: 0.6517647058823529
{'classifier': SVC(C=0.01, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=1e-09, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False), 'classifier__C': 0.01, 'classifier__gamma': 1e-09}


In [12]:
parameters = {
    'classifier': [SVC(kernel='linear')],
    'classifier__C': np.logspace(-2, 10, 13)
}
linear_gs = grid_search(ClassifierChain(), parameters)

print(f"Linear Best F1 Score: {linear_gs.best_score_}")
print(linear_gs.best_params_)

parameters = {
    'classifier': [SVC(kernel='rbf')],
    'classifier__C': np.logspace(-2, 10, 13),
    'classifier__gamma':  np.logspace(-9, 3, 13)
}
gaussian_gs = grid_search(ClassifierChain(), parameters)
print(f"Gaussian Best F1 Score: {gaussian_gs.best_score_}")
print(gaussian_gs.best_params_)

Linear Best F1 Score: 0.6341176470588235
{'classifier': SVC(C=0.01, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False), 'classifier__C': 0.01}
Gaussian Best F1 Score: 0.6529411764705882
{'classifier': SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=1.0, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False), 'classifier__C': 1.0, 'classifier__gamma': 1.0}
