# Imports

In [1]:
import PyQt5
get_ipython().magic('matplotlib qt')

In [2]:
import numpy as np
from scipy.cluster.hierarchy import dendrogram, linkage
import matplotlib.pyplot as plt
from matplotlib import style;  style.use('ggplot')

In [3]:
from scipy.cluster.hierarchy import fcluster

# Loading Processed Data Matrix

In [4]:
X = np.load('FOOKIN_df.npy')

# Generating The Hierachical Clustering Dendrogram
  #### Part 1: Complete Linkage Method

In [5]:
# generate the linkage matrix
ZC = linkage(X, 'complete')

In [6]:
# calculate full dendrogram
plt.figure(1, figsize=(25, 10))
plt.title('Hierarchical Clustering Dendrogram -- Complete-Linkage')
plt.xlabel('X[i]')
plt.ylabel('distance')
dendrogram(
    ZC,
    leaf_rotation=90.,  # rotates the x axis labels
    leaf_font_size=8.,  # font size for the x axis labels
)
plt.show()

In [8]:
plt.figure(2, figsize=(25, 10))
plt.title('Hierarchical Clustering Dendrogram -- Complete-Linkage (truncated)')
plt.xlabel('sample index')
plt.ylabel('distance')
dendrogram(
    ZC,
    truncate_mode='lastp',  # show only the last p merged clusters
    p=20,  # show only the last p merged clusters
    show_leaf_counts=False,  # otherwise numbers in brackets are counts
    leaf_rotation=90.,
    leaf_font_size=12.,
    show_contracted=True,  # to get a distribution impression in truncated branches
)
plt.show()

  #### Part 2: Ward Variance Minimization Method

In [9]:
# generate the linkage matrix
ZW = linkage(X, 'ward')

In [10]:
# calculate full dendrogram
plt.figure(3, figsize=(25, 10))
plt.title('Hierarchical Clustering Dendrogram -- Ward Var. Minimization')
plt.xlabel('sample index')
plt.ylabel('distance')
dendrogram(
    ZW,
    leaf_rotation=90.,  # rotates the x axis labels
    leaf_font_size=8.,  # font size for the x axis labels
)
plt.show()

In [11]:
plt.figure(4, figsize=(25, 10))
plt.title('Hierarchical Clustering Dendrogram -- Ward Var. Minimization (truncated)')
plt.xlabel('sample index')
plt.ylabel('distance')
dendrogram(
    ZW,
    truncate_mode='lastp',  # show only the last p merged clusters
    p=20,  # show only the last p merged clusters
    show_leaf_counts=False,  # otherwise numbers in brackets are counts
    leaf_rotation=90.,
    leaf_font_size=12.,
    show_contracted=True,  # to get a distribution impression in truncated branches
)
plt.show()

  #### Part 3: Weighted Method

In [13]:
# generate the linkage matrix
ZW_ = linkage(X, 'weighted')

In [14]:
# calculate full dendrogram
plt.figure(5, figsize=(25, 10))
plt.title('Hierarchical Clustering Dendrogram -- Weighted')
plt.xlabel('sample index')
plt.ylabel('distance')
dendrogram(
    ZW_,
    leaf_rotation=90.,  # rotates the x axis labels
    leaf_font_size=8.,  # font size for the x axis labels
)
plt.show()

In [15]:
plt.figure(6, figsize=(25, 10))
plt.title('Hierarchical Clustering Dendrogram -- Weighted (truncated)')
plt.xlabel('sample index')
plt.ylabel('distance')
dendrogram(
    ZW_,
    truncate_mode='lastp',  # show only the last p merged clusters
    p=20,  # show only the last p merged clusters
    show_leaf_counts=False,  # otherwise numbers in brackets are counts
    leaf_rotation=90.,
    leaf_font_size=12.,
    show_contracted=True,  # to get a distribution impression in truncated branches
)
plt.show()

  #### Part 4: Single Linkage Method

In [16]:
# generate the linkage matrix
ZSL = linkage(X, 'single')

In [17]:
# calculate full dendrogram
plt.figure(7, figsize=(25, 10))
plt.title('Hierarchical Clustering Dendrogram -- Single-Linkage')
plt.xlabel('sample index')
plt.ylabel('distance')
dendrogram(
    ZSL,
    leaf_rotation=90.,  # rotates the x axis labels
    leaf_font_size=8.,  # font size for the x axis labels
)
plt.show()

In [18]:
plt.figure(8, figsize=(25, 10))
plt.title('Hierarchical Clustering Dendrogram -- Single-Linkage (truncated)')
plt.xlabel('sample index')
plt.ylabel('distance')
dendrogram(
    ZSL,
    truncate_mode='lastp',  # show only the last p merged clusters
    p=20,  # show only the last p merged clusters
    show_leaf_counts=False,  # otherwise numbers in brackets are counts
    leaf_rotation=90.,
    leaf_font_size=12.,
    show_contracted=True,  # to get a distribution impression in truncated branches
)
plt.show()

  #### Part 5: Centroid Method

In [19]:
# generate the linkage matrix
ZC_ = linkage(X, 'centroid')

In [20]:
# calculate full dendrogram
plt.figure(9, figsize=(25, 10))
plt.title('Hierarchical Clustering Dendrogram -- Centroid')
plt.xlabel('sample index')
plt.ylabel('distance')
dendrogram(
    ZC_,
    leaf_rotation=90.,  # rotates the x axis labels
    leaf_font_size=8.,  # font size for the x axis labels
)
plt.show()

In [None]:
plt.figure(10, figsize=(25, 10))
plt.title('Hierarchical Clustering Dendrogram -- Centroid (truncated)')
plt.xlabel('sample index')
plt.ylabel('distance')
dendrogram(
    ZC_,
    truncate_mode='lastp',  # show only the last p merged clusters
    p=20,  # show only the last p merged clusters
    show_leaf_counts=False,  # otherwise numbers in brackets are counts
    leaf_rotation=90.,
    leaf_font_size=12.,
    show_contracted=True,  # to get a distribution impression in truncated branches
)
plt.show()

  #### Part 6: Median Method

In [21]:
# generate the linkage matrix
ZM = linkage(X, 'median')

In [22]:
# calculate full dendrogram
plt.figure(11, figsize=(25, 10))
plt.title('Hierarchical Clustering Dendrogram -- Median')
plt.xlabel('sample index')
plt.ylabel('distance')
dendrogram(
    ZM,
    leaf_rotation=90.,  # rotates the x axis labels
    leaf_font_size=8.,  # font size for the x axis labels
)
plt.show()

In [None]:
plt.figure(12, figsize=(25, 10))
plt.title('Hierarchical Clustering Dendrogram -- Median (truncated)')
plt.xlabel('sample index')
plt.ylabel('distance')
dendrogram(
    ZM,
    truncate_mode='lastp',  # show only the last p merged clusters
    p=20,  # show only the last p merged clusters
    show_leaf_counts=False,  # otherwise numbers in brackets are counts
    leaf_rotation=90.,
    leaf_font_size=12.,
    show_contracted=True,  # to get a distribution impression in truncated branches
)
plt.show()

  #### Part 7: Average Method

In [23]:
# generate the linkage matrix
ZA = linkage(X, 'average')

In [24]:
# calculate full dendrogram
plt.figure(13, figsize=(25, 10))
plt.title('Hierarchical Clustering Dendrogram -- Average')
plt.xlabel('sample index')
plt.ylabel('distance')
dendrogram(
    ZA,
    leaf_rotation=90.,  # rotates the x axis labels
    leaf_font_size=8.,  # font size for the x axis labels
)
plt.show()

In [None]:
plt.figure(14, figsize=(25, 10))
plt.title('Hierarchical Clustering Dendrogram -- Average (truncated)')
plt.xlabel('sample index')
plt.ylabel('distance')
dendrogram(
    ZA,
    truncate_mode='lastp',  # show only the last p merged clusters
    p=20,  # show only the last p merged clusters
    show_leaf_counts=False,  # otherwise numbers in brackets are counts
    leaf_rotation=90.,
    leaf_font_size=12.,
    show_contracted=True,  # to get a distribution impression in truncated branches
)
plt.show()

# Getting the number of Clusters
  #### Part 1: From Complete Clustering Algorithm

In [None]:
max_d = 2
clusters_ = fcluster(ZC, max_d, criterion='distance')
clusters_

  #### Part 2: Ward Variance Minimization Method

In [None]:
max_d = 5
clusters_ = fcluster(ZW, max_d, criterion='distance')
clusters_

  #### Part 3: Weighted Method

In [None]:
max_d = 2
clusters_ = fcluster(ZW_, max_d, criterion='distance')
clusters_

  #### Part 4: Single Linkage Method

In [None]:
max_d = 2
clusters_ = fcluster(ZSL, max_d, criterion='distance')
clusters_

  #### Part 5: Centroid Method

In [None]:
max_d = 2
clusters_ = fcluster(ZC_, max_d, criterion='distance')
clusters_

  #### Part 6: Median Method

In [None]:
max_d = 2
clusters_ = fcluster(ZM, max_d, criterion='distance')
clusters_

  #### Part 7: Average Method

In [None]:
max_d = 2
clusters_ = fcluster(ZA, max_d, criterion='distance')
clusters_