In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from textwrap import wrap

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

import sys
sys.path.insert(0,'../')

import common.datautils as datautils
import common.featuresutils as featuresutils

In [None]:
average_data = datautils.load_ce_results('CE.results.zip')
stable_benchmarks = ['BT', 'CG', 'EP', 'FT', 'IS', 'MG', 'UA', 'botsalgn', 'botsspar', 'bwaves', 'fma3d', 'kdtree', 'nab', 'imagick', 'smithwa', 'swim', 'lbm', 'hotspot', 'backprop', 'kmeans']
suites = 7 * ['NPB'] + 9 * ['SPEC'] + ['Parboil'] + 3 * ['Rodinia']

In [None]:
def run_pca(benchmarks, with_dwarf=False, random_dwarfs=False, visualise=True):
    data = featuresutils.load_features(benchmarks, with_dwarf=with_dwarf)
    
    # Run PCA
    X = StandardScaler().fit_transform(data)
    pca = PCA(n_components=2)
    principal_components = pca.fit_transform(X)
    print(len(principal_components))
    component_labels = ['PC1', 'PC2']
    principal_frame = pd.DataFrame(data=principal_components, columns=component_labels, index=benchmarks)
    principal_frame.loc[:, 'Suite'] = suites
    
    if visualise:
        # Generate SCREE Plot
        fig = plt.figure(figsize=(16,6))
        ax = fig.add_subplot(1, 2, 1)
        ax.bar(component_labels, pca.explained_variance_ratio_)
        ax.set_title('Explained Variance of Principal Components')
        ax.set_xlabel('Principal Component')
        ax.set_ylabel('Explained Variance Ratio')


        ax = fig.add_subplot(1, 2, 2)
        ax.set_xlabel('PC1')
        ax.set_ylabel('PC2')
        ax.set_title('2 Component PCA plot of stable benchmarks using Milepost features')
        ax.grid()

        ax.scatter(principal_frame['PC1'], 
                   principal_frame['PC2'])
    
    return principal_frame


def visualise_all():
    principal_frame = run_pca(stable_benchmarks, visualise=False)
    
    npb_pca = principal_frame[principal_frame['Suite'] == 'NPB']
    spec_pca = principal_frame[principal_frame['Suite'] == 'SPEC']
    parboil_pca = principal_frame[principal_frame['Suite'] == 'Parboil']
    rodinia_pca = principal_frame[principal_frame['Suite'] == 'Rodinia']
    
    plt.figure(figsize=(8, 6))
    plt.scatter(npb_pca['PC1'], 
                npb_pca['PC2'],
                marker='o',
                label='NPB')

    plt.scatter(spec_pca['PC1'], 
                spec_pca['PC2'],
                marker='x',
                label='SPEC')
    
    plt.scatter(parboil_pca['PC1'].iloc[0], 
                parboil_pca['PC2'].iloc[0],
                marker='v',
                label='Parboil')
    
    plt.scatter(rodinia_pca['PC1'], 
                rodinia_pca['PC2'],
                marker='d',
                label='Rodinia')

    plt.grid()
    plt.legend()
    plt.xlabel('Principal Component 1')
    plt.ylabel('Principal Component 2')
    plt.title('\n'.join(wrap('Comparison of all stable benchmarks by suite using PCA', 55)))
    plt.show()
    

def compare_with_dwarf_feature():
    no_dwarf = run_pca(stable_benchmarks, with_dwarf=False, visualise=False)   
    dwarf = run_pca(stable_benchmarks, with_dwarf=True, visualise=False)
    
    
    
    plt.figure(figsize=(8, 6))
    plt.scatter(no_dwarf['PC1'], 
                no_dwarf['PC2'],
                marker='o',
                label='No Dwarf')

    plt.scatter(dwarf['PC1'], 
                dwarf['PC2'],
                marker='x',
                label='With Dwarf')

    plt.grid()
    plt.legend()
    plt.xlabel('Principal Component 1')
    plt.ylabel('Principal Component 2')
    plt.title('\n'.join(wrap('Comparison of dwarf feature on separation of stable benchmarks using PCA', 55)))
    plt.show()


### Compare Benchmark Suites

In [None]:
visualise_all()

In [None]:
compare_with_dwarf_feature()