In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from textwrap import wrap

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

import sys
sys.path.insert(0,'../')

from common.featuresutils import load_features

In [None]:
stable_benchmarks = ['botsalgn', 'botsspar', 'smithwa', 'imagick', 'kdtree', 'UA', 'nab', 'BT', 'MG', 'IS', 'FT', 'EP', 'CG', 'swim', 'fma3d', 'bwaves']

In [None]:
def visualise_pca_subset(benchmarks, with_dwarf=False):
    data = load_features(benchmarks, with_dwarf=with_dwarf)
    
    # Run PCA
    X = StandardScaler().fit_transform(data)
    pca = PCA(n_components=5)
    principal_components = pca.fit_transform(X)
    component_labels = ['PC1', 'PC2', 'PC3', 'PC4', 'PC5']
    principal_frame = pd.DataFrame(data=principal_components, columns=component_labels)

    # Generate SCREE Plot
    fig = plt.figure(figsize=(16,6))
    ax = fig.add_subplot(1, 2, 1)
    ax.bar(component_labels, pca.explained_variance_ratio_)
    ax.set_title('Explained Variance of Principal Components')
    ax.set_xlabel('Principal Component')
    ax.set_ylabel('Explained Variance Ratio')
    
    
    ax = fig.add_subplot(1, 2, 2)
    ax.set_xlabel('PC1')
    ax.set_ylabel('PC2')
    ax.set_title('2 Component PCA for NPB and SPEC using Milepost features')
    
    ax.scatter(principal_frame['PC1'], 
               principal_frame['PC2'])

In [None]:
visualise_pca_subset(stable_benchmarks)

In [None]:
visualise_pca_subset(stable_benchmarks, with_dwarf=True)