In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

In [None]:
data = pd.read_csv('features.csv', header=None)
data.head()

In [None]:
X = StandardScaler().fit_transform(data.values)

pca = PCA(n_components=5)
principal_components = pca.fit_transform(X)
component_labels = ['PC1', 'PC2', 'PC3', 'PC4', 'PC5']

plt.bar(component_labels, pca.explained_variance_ratio_)
plt.title('Explained Variance of Principal Components')
plt.xlabel('Principal Component')
plt.ylabel('Explained Variance Ratio')

In [None]:
principal_frame = pd.DataFrame(data=principal_components, columns=component_labels, index=['BT', 'CG', 'EP', 'FT', 'IS', 'LU', 'MG', 'SP', 'UA'])
principal_frame.head(9)

In [None]:
def get_cmap(n, name='hsv'):
    '''Returns a function that maps each index in 0, 1, ..., n-1 to a distinct 
    RGB color; the keyword argument name must be a standard mpl colormap name.'''
    return plt.cm.get_cmap(name, n)

In [None]:
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.set_xlabel('PC1')
ax.set_ylabel('PC2')
ax.set_title('2 Component PCA for NPB')

cmap=get_cmap(len(principal_frame))

for i in range(len(principal_frame)):
    benchmark = principal_frame.index[i]
    ax.scatter(principal_frame.loc[benchmark, 'PC1'], principal_frame.loc[benchmark, 'PC2'], c=cmap(i), label=benchmark)

ax.legend()