In [None]:
import zipfile as zip

import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import GridSearchCV

from skmultilearn.problem_transform import BinaryRelevance, ClassifierChain, LabelPowerset

import sys
sys.path.insert(0,'../')
import common.flagutils as flagutils
import common.datautils as datautils
import common.featuresutils as featuresutils

In [None]:
ce_data = datautils.load_ce_results('CE.results.zip')
ce_data.head()

In [None]:
o3_flags = flagutils.load_o3_flags()
all_flags = flagutils.load_flag_list()
print(len(all_flags))

In [None]:
benchmarks = ce_data["Benchmark"].unique()
print(benchmarks)

### Compare multi-label classifiers and find optimal paramters

In [None]:
def load_data(with_dwarf):
    X = StandardScaler().fit_transform(featuresutils.load_features(benchmarks, with_dwarf=with_dwarf))
    y = []

    for benchmark in benchmarks:
        config_str = datautils.best_configuration('Energy', benchmark, ce_data)
        if config_str == '-O3':
            config_str = flagutils.get_o3_config()

        labels = []

        config = config_str[4:].split(' ')
        for i, flag in enumerate(config):
            if flag == all_flags[i]:
                labels.append(1) # Flag is turned on
            elif flag == '-fno-' + all_flags[i][2:]:
                labels.append(0) # FLag is turned off -fno
            else:
                print("ERROR:" + flag)

        y.append(labels)

    return X, np.array(y)


def grid_search(classifier, parameters, X, y, n_jobs):
    gs = GridSearchCV(classifier, parameters, cv=20, scoring='f1_macro', n_jobs=n_jobs)
    gs.fit(X, y)

    return gs


def test_multilabel_classifier(classifier, X, y, n_jobs):
    classifier_name = type(classifier).__name__
    
    print(f"Testing {classifier_name}")
    
    parameters = [
        {
            'classifier': [SVC(kernel='linear')],
            'classifier__C': np.logspace(-2, 10, 13)
        },
        {
            'classifier': [SVC(kernel='rbf')],
            'classifier__C': np.logspace(-2, 10, 13), 
            'classifier__gamma':  np.logspace(-9, 3, 13)
        }
#         {
#             'classifier': [GaussianNB(), DecisionTreeClassifier(max_depth=5), AdaBoostClassifier(), DecisionTreeClassifier(max_depth=5)]
#         }
    ]
    gs = grid_search(classifier, parameters, X, y, n_jobs)

    print(f"{classifier_name} Best F1 Score: {gs.best_score_}")
    print(gs.best_params_)
    print("---------")    

In [None]:
def run_comparison(multilabel_classifiers, n_jobs, disable_warnings=False, with_dwarf=False):
    X, y = load_data(with_dwarf)
    y = flagutils.remove_static_labels(y)

    print(f"Loaded {X.shape[0]} samples with {X.shape[1]} features and {y.shape[1]} labels.")
    
#     Grid search can throw a lot of warnings. Is useful to just mute them sometimes.
    if disable_warnings:
        import warnings
        warnings.filterwarnings('ignore')
        
    for clf in multilabel_classifiers:
        test_multilabel_classifier(clf, X, y, n_jobs)

In [None]:
# run_comparison([BinaryRelevance(), ClassifierChain()], -1, disable_warnings=True, with_dwarf=False)

# Loaded 20 samples with 65 features and 94 labels.
# Testing BinaryRelevance
# BinaryRelevance Best F1 Score: 0.5882978723404256
# {'classifier': SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
#   decision_function_shape='ovr', degree=3, gamma=0.1, kernel='rbf',
#   max_iter=-1, probability=False, random_state=None, shrinking=True,
#   tol=0.001, verbose=False), 'classifier__C': 1.0, 'classifier__gamma': 0.1}
# ---------
# Testing ClassifierChain
# ClassifierChain Best F1 Score: 0.625531914893617
# {'classifier': SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
#   decision_function_shape='ovr', degree=3, gamma=0.01, kernel='rbf',
#   max_iter=-1, probability=False, random_state=None, shrinking=True,
#   tol=0.001, verbose=False), 'classifier__C': 1.0, 'classifier__gamma': 0.01}
# ---------

In [None]:
# run_comparison([BinaryRelevance(), ClassifierChain(), LabelPowerset()], -1, disable_warnings=True, with_dwarf=True)

In [None]:
def min_o3_config():
    X, y_all = load_data(False)
    y_subset, remaining_labels = flagutils.remove_static_labels(y_all, with_labels=True)

    full_o3_config = flagutils.get_o3_config().split(' ')
    removed_labels = np.setdiff1d(all_flags, remaining_labels)

    
    return flagutils.get_cmd_string_from_config([f for f in full_o3_config if f not in removed_labels])

min_o3_config()

In [None]:
def not_in_o3(with_dwarf=False):
    X, y_all = load_data(with_dwarf)
    y_subset, remaining_labels = flagutils.remove_static_labels(y_all, with_labels=True)
    
    print(y_all.shape)
    
    removed_labels = np.setdiff1d(all_flags, remaining_labels)
    
    return [f for f in removed_labels if f not in o3_flags]

not_in_o3() 
    

### Output predicted configs

In [None]:
def print_predicted_configs():
    X, y = datautils.format_data_for_multilabel(ce_data, False, benchmarks)
    y, remaining_labels = flagutils.remove_static_labels(y, with_labels=True)

    kf = KFold(n_splits=len(y))
    for train_index, test_index in kf.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

        clf = ClassifierChain(SVC(kernel='rbf', C=0.01, gamma=0.01))
        clf.fit(X_train, y_train)
        prediction = clf.predict(X_test).toarray()

        config = ""

        for index, flag_enabled in enumerate(prediction[0]):
            if flag_enabled:
                config += remaining_labels[0][index] + " "
            else:
                config += "-fno-" + remaining_labels[0][index][2:] + " "

        print(benchmarks[test_index])
        print(config)
        print("-----")

# print_predicted_configs()

### Classifier chain results

In [None]:
multi_label_data = datautils.load_csv_results('ML.20180904-134133.csv', ['Benchmark', 'Flags', 'Success'])
multi_label_data.head()

### Significance

In [None]:
ric_data = datautils.load_zip_results('RIC.results.zip', ['Benchmark','Flags', 'RunId', 'Success'])
ric_data.head()

In [None]:
def o3_relative_data(ce_data, ml_data):
    o3_data = ce_data.loc[ce_data["Flags"] == "-O3"]
    
    relative_data = ml_data.copy(deep=True)
    
    for benchmark in ml_data["Benchmark"].unique():
        o3 = o3_data.loc[o3_data["Benchmark"] == benchmark]
        o3_energy = o3.iloc[0]["Energy"]
        o3_time = o3.iloc[0]["Time"]

        relative_data.loc[relative_data["Benchmark"] == benchmark, "Energy"] /= o3_energy
        relative_data.loc[relative_data["Benchmark"] == benchmark, "Time"] /= o3_time

    return relative_data[relative_data["Flags"] != '-O3']

def compute_probability(reduction):
    
    total_configs = 0
    good_configs = 0
    
    for benchmark in ric_data["Benchmark"].unique():
        benchmark_data = ric_data[ric_data["Benchmark"] == benchmark]
        
        o3_data = benchmark_data[benchmark_data["Flags"] == '-O3']
        o3_energy = o3_data["Energy"].iloc[0]
        
        total_configs += len(benchmark_data) - 1 # Don't count O3
        good_configs += len(benchmark_data[benchmark_data["Energy"] < (o3_energy * reduction)])
    
    good_config_probability = good_configs / total_configs
    
    return good_config_probability

def improvement_significance(significance_level):
    sig_relative_data = o3_relative_data(ce_data, multi_label_data)

    columns = ["Improvement", "P-Value", "Significant"]
    benchmarks_with_improvement = []
    results = []
    
    for benchmark in sig_relative_data["Benchmark"].unique():
        benchmark_data = sig_relative_data[sig_relative_data["Benchmark"] == benchmark]
        if benchmark_data["Energy"].iloc[0] < 1:
            benchmarks_with_improvement.append(benchmark)
            probability = np.round(compute_probability(benchmark_data["Energy"].iloc[0]), 2)
            improvement = np.round(1 - benchmark_data["Energy"].iloc[0], 2) * 100
            
            if probability < significance_level:
                results.append([improvement,probability,'Yes'])
            else:
                results.append([improvement,probability,'No'])
                
    return pd.DataFrame(results, columns=columns, index=benchmarks_with_improvement).sort_values('Improvement', ascending=False)

len(improvement_significance(0.05))