# DATA MINING PROJECT: Analysis of a Supermarket’s Customers
## 2.3) Clustering Analysis: Hierarchical
### *Antonio Strippoli, Valerio Mariani*

In [None]:
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

from scipy.spatial.distance import pdist
from scipy.cluster.hierarchy import linkage, dendrogram

pd.set_option('mode.chained_assignment', None)

In [None]:
def plot(ax, folder="clustering_hierarchical", filename="", figsize=(6.4, 4.8)):
    fig = plt.gcf()
    fig.set_size_inches(*figsize)
    plt.tight_layout()
    if filename:
        path = os.path.join("..", "report", "imgs", folder)
        if not os.path.exists(path):
            os.mkdir(path)
        plt.savefig(os.path.join(path, filename))
    plt.show()
    plt.close()

In [None]:
# Load dataset for clustering
cdf = pd.read_csv("customer_profilation.csv", index_col=0)

### Definition of attributes employed for clustering

In [None]:
# We choosed these 3 attributes, since they usually classify very well customers
attr_cluster = ['Recency', 'Frequency', 'Monetary']
cdf_cluster = cdf[attr_cluster]

# Normalize values
scaler = MinMaxScaler()
X = scaler.fit_transform(cdf_cluster.values)

### Clusterization using different algorithms

In [None]:
# Nearest Point Algorithm - MIN
data_dist = pdist(X, metric='euclidean')
data_link = linkage(data_dist, method='single', metric='euclidean')
plot(dendrogram(data_link, color_threshold=0.2, truncate_mode='lastp'), filename="min_algorithm")

In [None]:
# Voor Hees Algorithm - MAX
data_dist = pdist(X, metric='euclidean')
data_link = linkage(data_dist, method='complete', metric='euclidean')
plot(dendrogram(data_link, color_threshold=1.0, truncate_mode='lastp'), filename="max_algorithm")

In [None]:
# UPGMA Algorithm - Group Average
data_dist = pdist(X, metric='euclidean')
data_link = linkage(data_dist, method='average', metric='euclidean')
plot(dendrogram(data_link, color_threshold=0.4, truncate_mode='lastp'), filename="g_average_algorithm")