In [None]:
#Title:  Hierarchical Clustering


# Agglomerative vs Divisive Clustering

# Task 1: Agglomerative Clustering on Simple Numerical Data

# 1. Load the following dataset: [1, 2, 5, 10, 15, 30] .
# 2. Apply Agglomerative Clustering where each point starts as its own cluster and successively merge the closest pair of clusters.
# 3. Observe and note down the order in which points are merged.

import numpy as np
from scipy.cluster.hierarchy import linkage, dendrogram
import matplotlib.pyplot as plt

data_1d = np.array([1, 2, 5, 10, 15, 30]).reshape(-1, 1)

Z = linkage(data_1d, method='ward')

plt.figure(figsize=(8, 4))
dendrogram(Z, labels=[1, 2, 5, 10, 15, 30])
plt.title("Task 1: Agglomerative Clustering (1D Data)")
plt.xlabel("Data Points")
plt.ylabel("Distance")
plt.show()

print("Merge Order (Z):")
print(Z)



# Task 2: Divisive Clustering on Simple Numerical Data

# 1. Use the same dataset: [1, 2, 5, 10, 15, 30] .
# 2. Start with all points in a single cluster and successively divide until each point is its own cluster.
# 3. Record the divisions made at each step.
from sklearn.cluster import KMeans

def divisive_clustering(data):
    clusters = [data]
    divisions = []
    
    while any(len(cluster) > 1 for cluster in clusters):
        new_clusters = []
        for cluster in clusters:
            if len(cluster) > 1:
                kmeans = KMeans(n_clusters=2, random_state=0).fit(np.array(cluster).reshape(-1, 1))
                labels = kmeans.labels_
                cluster1 = [cluster[i] for i in range(len(cluster)) if labels[i] == 0]
                cluster2 = [cluster[i] for i in range(len(cluster)) if labels[i] == 1]
                divisions.append((cluster1, cluster2))
                new_clusters.extend([cluster1, cluster2])
            else:
                new_clusters.append(cluster)
        clusters = new_clusters
    
    return divisions

data_list = [1, 2, 5, 10, 15, 30]
divisions = divisive_clustering(data_list)

print("Task 2: Divisive Clustering Steps:")
for i, div in enumerate(divisions, 1):
    print(f"Step {i}: {div}")

# Task 3: Comparing Agglomerative and Divisive Methods on a 2D Dataset

# 1. Create a small 2D dataset: [(1, 2), (2, 3), (10, 10), (25, 25), (26, 27)] .
# 2. Apply both Agglomerative and Divisive Clustering on this dataset.
# 3. Compare the results and discuss the similarities and differences.

data_2d = np.array([[1, 2], [2, 3], [10, 10], [25, 25], [26, 27]])

Z_2d = linkage(data_2d, method='ward')

plt.figure(figsize=(8, 4))
dendrogram(Z_2d, labels=[1, 2, 3, 4, 5])
plt.title("Task 3: Agglomerative Clustering (2D Data)")
plt.xlabel("Data Points")
plt.ylabel("Distance")
plt.show()

def divisive_clustering_2d(data):
    clusters = [data]
    divisions = []
    while any(len(cluster) > 1 for cluster in clusters):
        new_clusters = []
        for cluster in clusters:
            if len(cluster) > 1:
                kmeans = KMeans(n_clusters=2, random_state=0).fit(np.array(cluster))
                labels = kmeans.labels_
                cluster1 = [cluster[i] for i in range(len(cluster)) if labels[i] == 0]
                cluster2 = [cluster[i] for i in range(len(cluster)) if labels[i] == 1]
                divisions.append((cluster1, cluster2))
                new_clusters.extend([cluster1, cluster2])
            else:
                new_clusters.append(cluster)
        clusters = new_clusters
    return divisions

divs_2d = divisive_clustering_2d(data_2d.tolist())

print("Task 3: Divisive Clustering (2D) Steps:")
for i, div in enumerate(divs_2d, 1):
    print(f"Step {i}: {div}")

