## **Hierarchical Clustering**

Hierarchical clustering builds nested clusters by repeatedly merging or splitting clusters based on a linkage criterion.



**Imports**

In [3]:
from sklearn.cluster import AgglomerativeClustering
from scipy.cluster.hierarchy import dendrogram, linkage


**Data Loading**

In [None]:
# Load the dataset
data = pd.read_csv('your_dataset.csv')

# Select only numerical features for clustering
X = data.select_dtypes(include=[np.number])

# Display the first few rows
print(X.head())


**Dendrogram**

In [None]:
# Create a linkage matrix for the dendrogram
linked = linkage(X, method='ward')

# Plot the dendrogram
plt.figure(figsize=(10, 7))
dendrogram(linked, orientation='top', distance_sort='descending', show_leaf_counts=True)
plt.title('Dendrogram')
plt.xlabel('Samples')
plt.ylabel('Euclidean Distance')
plt.show()


**Model Building**

In [None]:
# Initialize and fit the Agglomerative Clustering model
hierarchical = AgglomerativeClustering(n_clusters=3, affinity='euclidean', linkage='ward')
clusters = hierarchical.fit_predict(X)


**Visualizations**

In [None]:
# Visualize the clusters (for 2D data only)
plt.scatter(X.iloc[:, 0], X.iloc[:, 1], c=clusters, cmap='viridis', s=50)
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], color='red', marker='X', s=200, label='Centroids')
plt.title('k-Means Clustering')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.legend()
plt.show()
