In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.cluster import AgglomerativeClustering
from scipy.cluster.hierarchy import dendrogram, linkage

# Load the Iris dataset
iris = datasets.load_iris()
X = iris.data

# Define the available linkage methods
linkage_methods = {
    1: 'ward',
    2: 'complete',
    3: 'average',
    4: 'single'
}

In [None]:
# Get user's choice for the linkage method
print("Choose a linkage method:")
for key, method in linkage_methods.items():
    print(f"Press {key} for {method} linkage")

choice = int(input("Enter your choice: "))

# Perform hierarchical clustering based on user's choice
if choice in linkage_methods:
    method = linkage_methods[choice]

    # Calculate the linkage matrix
    Z = linkage(X, method)

    # Create dendrogram
    plt.figure(figsize=(12, 6))
    plt.title(f'Dendrogram for {method} linkage')
    dendrogram(Z, labels=iris.target_names[iris.target], orientation='top')
    plt.show()

    # Perform clustering using AgglomerativeClustering
    n_clusters = len(iris.target_names)
    clustering = AgglomerativeClustering(n_clusters=n_clusters, linkage=method)
    clustering.fit(X)

    # Print cluster assignments for each data point
    cluster_labels = clustering.labels_
    for i, label in enumerate(cluster_labels):
        print(f"Data point {i} is assigned to cluster {label} ({iris.target_names[label]})")
else:
    print("Invalid choice. Please choose a valid linkage method.")

Choose a linkage method:
Press 1 for ward linkage
Press 2 for complete linkage
Press 3 for average linkage
Press 4 for single linkage


## Agglomerative Clustering:

Agglomerative Clustering is a hierarchical clustering algorithm used to group data points into clusters.
It starts with each data point as a single cluster and iteratively merges the closest clusters until a single cluster or a predefined number of clusters is reached.

## Linkage and Dendrogram:

Linkage in hierarchical clustering defines how the distance between clusters is calculated. It determines which clusters to merge in each step.

A Dendrogram is a tree-like diagram that represents the hierarchical clustering process, showing the merging of clusters at different levels. It helps to visualize the relationships between data points and clusters.

## Types of Linkage and Differences:

There are several types of linkage methods, including:
Ward Linkage: Minimizes the variance when merging clusters. It often results in equally sized, compact clusters.
Complete Linkage: Computes the maximum pairwise distance between data points in different clusters. It can lead to chaining.
Average Linkage: Computes the average pairwise distance between data points in different clusters. It is less sensitive to outliers.
Single Linkage: Computes the minimum pairwise distance between data points in different clusters. It can create long, trailing clusters.

The main difference between these linkage methods is how they measure the distance between clusters, which affects the shape and structure of the resulting clusters. Ward is often preferred for balanced, well-separated clusters, while single linkage can create elongated clusters. The choice depends on the data and the specific problem.