# Importing libraries...
  * #### The Cell #1 imports the essential matplotlib modules for displaying figures outside jupyter cell 
  * #### The Cell #2 imports the essessential pandas and numpy modules for our computations as well as the sklearn.decomposition module for making the PCA Decomposition (for dimension reduction)

In [1]:
import PyQt5
from tqdm import tqdm
import matplotlib.pyplot as plt
from matplotlib import style;  style.use('ggplot')
get_ipython().magic('matplotlib qt')

In [2]:
import numpy as np
import pandas as pd
from pandas.tools.plotting import parallel_coordinates
from sklearn.decomposition import PCA as sklearnPCA

# Loading the Samples and their Respective Cluster Labels...
  * The **X** contains the **samples normalized** with the **StandardScaler** method 
  * The **y_bsas** contains the **cluster labels** for each feature vector **according to the BSAS Sequential Algorithm**
  * The **y_kmeans** contains the **cluster labels** for each feature vector **according to the K-Means Strict Clustering Algorithm**
  * The **y_hiercl** contains the **cluster labels** for each feature vector **according to the (Agglomerative) Hierarchical Clustreing Algorithm**

In [3]:
X = np.load('comp-data/1-preprocessing-comp-data/user-feature-set-stdscl.npy')

y_bsas = np.load('comp-data/2-bsas-comp-data/clusters-stdscl.npy')
y_kmeans = np.load('comp-data/3a-k-means-comp-data/clusters.npy')
y_hiercl = np.load('comp-data/3b-hierarchical-clustering-comp-data/clusters_.npy')

# Initializing the PCA Decomposition Algorithm to 2 Dimensions...
# ...and Transforming our Dataset...

In [4]:
pca = sklearnPCA(n_components=2) #2-dimensional PCA transformation
X_pca = pd.DataFrame(pca.fit_transform(X))

# Plot the Dataset

In [5]:
plt.figure(0, figsize=(25, 10))
plt.title('Movielens Users -- PCA Dimension-Reduced Plot')
plt.xlabel('X')
plt.ylabel('Y')

plt.scatter(X_pca[0], X_pca[1], color='black')

plt.show()

![movielens-users-pca][fig-0]

[fig-0]: figures/movielens-users-pca.png "movielens-users-pca"

# Plot the Dataset in conjuction with the cluster labels that BSAS Computed

In [6]:
tmp = pd.DataFrame(X_pca)
tmp[2] = y_bsas

c_bsas_pca = tmp.groupby([2]).mean()
c_bsas_pca = c_bsas_pca.values
c_bsas_pca

array([[-0.8287241 , -0.02829115],
       [ 0.91022868, -1.84367285],
       [ 1.65505086,  0.25015431],
       [ 0.56904286,  2.16495304]])

In [7]:
plt.figure(1, figsize=(25, 10))
plt.title('Movielens Users Clustered with BSAS -- PCA Dimension-Reduced Plot')
plt.xlabel('X')
plt.ylabel('Y')

plt.scatter(X_pca[y_bsas==0][0], X_pca[y_bsas==0][1], label='Class 1', c='red')
plt.scatter(X_pca[y_bsas==1][0], X_pca[y_bsas==1][1], label='Class 2', c='blue')
plt.scatter(X_pca[y_bsas==2][0], X_pca[y_bsas==2][1], label='Class 3', c='lightgreen')
plt.scatter(X_pca[y_bsas==3][0], X_pca[y_bsas==3][1], label='Class 4', c='magenta')

plt.scatter(c_bsas_pca[0][0], c_bsas_pca[0][1], label='Class 1 Centroid', c='darkred', marker='X', s=200)
plt.scatter(c_bsas_pca[1][0], c_bsas_pca[1][1], label='Class 2 Centroid', c='darkblue', marker='X', s=200)
plt.scatter(c_bsas_pca[2][0], c_bsas_pca[2][1], label='Class 3 Centroid', c='darkgreen', marker='X', s=200)
plt.scatter(c_bsas_pca[3][0], c_bsas_pca[3][1], label='Class 4 Centroid', c='darkmagenta', marker='X', s=200)
plt.legend()
plt.show()

![movielens-users-pca-bsas][fig-1]

[fig-1]: figures/movielens-users-pca-bsas.png "movielens-users-pca-bsas"

# Plot the Dataset in conjuction with the cluster labels that K-Means Computed

In [8]:
tmp = pd.DataFrame(X_pca)
tmp[2] = y_kmeans

c_kmeans_pca = tmp.groupby([2]).mean()
c_kmeans_pca = c_kmeans_pca.values
c_kmeans_pca

array([[ 0.33071029, -1.37244137],
       [ 0.11282037,  1.02241536],
       [ 1.91994436,  0.17044288],
       [-1.38612609, -0.09342405]])

In [9]:
plt.figure(2, figsize=(25, 10))
plt.title('Movielens Users Clustered with K-Means (K=4) -- PCA Dimension Reduced Plot')
plt.xlabel('X')
plt.ylabel('Y')

plt.scatter(X_pca[y_kmeans==0][0], X_pca[y_kmeans==0][1], label='Class 1', c='red')
plt.scatter(X_pca[y_kmeans==1][0], X_pca[y_kmeans==1][1], label='Class 2', c='blue')
plt.scatter(X_pca[y_kmeans==2][0], X_pca[y_kmeans==2][1], label='Class 3', c='lightgreen')
plt.scatter(X_pca[y_kmeans==3][0], X_pca[y_kmeans==3][1], label='Class 4', c='magenta')

plt.scatter(c_kmeans_pca[0][0], c_kmeans_pca[0][1], label='Class 1 Centroid', c='darkred', marker='X', s=200)
plt.scatter(c_kmeans_pca[1][0], c_kmeans_pca[1][1], label='Class 2 Centroid', c='darkblue', marker='X', s=200)
plt.scatter(c_kmeans_pca[2][0], c_kmeans_pca[2][1], label='Class 3 Centroid', c='darkgreen', marker='X', s=200)
plt.scatter(c_kmeans_pca[3][0], c_kmeans_pca[3][1], label='Class 4 Centroid', c='darkmagenta', marker='X', s=200)
plt.legend()
plt.show()

![movielens-users-pca-k-means][fig-2]

[fig-2]: figures/movielens-users-pca-k-means.png "movielens-users-pca-k-means"

# Plot the Dataset in conjuction with the cluster labels that the (Agglomerative) Hierarchical Clustering Computed

In [10]:
tmp = pd.DataFrame(X_pca)
tmp[2] = y_hiercl

c_hiercl_pca = tmp.groupby([2]).mean()
c_hiercl_pca = c_hiercl_pca.values
c_hiercl_pca

array([[-0.46872831,  1.58249278],
       [-0.46728111, -0.09794226],
       [ 1.80895447,  1.05439454],
       [ 1.59344914, -1.32082243]])

In [11]:
plt.figure(3, figsize=(25, 10))
plt.title('Movielens Users Clustered with (Agglomerative) Hierarchical Clustering -- PCA Dimension Reduced Plot')
plt.xlabel('X')
plt.ylabel('Y')

plt.scatter(X_pca[y_hiercl==1][0], X_pca[y_hiercl==1][1], label='Class 1', c='red')
plt.scatter(X_pca[y_hiercl==2][0], X_pca[y_hiercl==2][1], label='Class 2', c='blue')
plt.scatter(X_pca[y_hiercl==3][0], X_pca[y_hiercl==3][1], label='Class 3', c='lightgreen')
plt.scatter(X_pca[y_hiercl==4][0], X_pca[y_hiercl==4][1], label='Class 4', c='magenta')

plt.scatter(c_hiercl_pca[0][0], c_hiercl_pca[0][1], label='Class 1 Centroid', c='darkred', marker='X', s=200)
plt.scatter(c_hiercl_pca[1][0], c_hiercl_pca[1][1], label='Class 2 Centroid', c='darkblue', marker='X', s=200)
plt.scatter(c_hiercl_pca[2][0], c_hiercl_pca[2][1], label='Class 3 Centroid', c ='darkgreen', marker='X', s=200)
plt.scatter(c_hiercl_pca[3][0], c_hiercl_pca[3][1], label='Class 4 Centroid', c='darkmagenta', marker='X', s=200)

plt.legend()
plt.show()

![movielens-users-pca-agglomerative][fig-3]

[fig-3]: figures/movielens-users-pca-agglomerative.png "movielens-users-pca-agglomerative"

### Note: The Brief Clustering Performance Comparison is in the Chapter 5 at the Assignment's Documentation

# ~ END OF CLUSTERING PERFORMANCE COMPARISON ~