In [11]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans, AgglomerativeClustering, MeanShift
from sklearn.metrics import silhouette_score, calinski_harabasz_score, davies_bouldin_score


In [12]:
# Load dataset
data_url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data"
columns = ["Class", "Alcohol", "Malic acid", "Ash", "Alcalinity of ash", "Magnesium",
           "Total phenols", "Flavanoids", "Nonflavanoid phenols", "Proanthocyanins",
           "Color intensity", "Hue", "OD280/OD315 of diluted wines", "Proline"]
data = pd.read_csv(data_url, header=None, names=columns)

# Features only
X = data.iloc[:, 1:]

# Normalize data
scaler = StandardScaler()
X_normalized = scaler.fit_transform(X)


In [13]:
# Define clustering methods and results
clustering_algorithms = {
    "K-Means": KMeans,
    "Hierarchical": AgglomerativeClustering,
    "Mean-Shift": MeanShift
}

results = []
for technique_name, technique in clustering_algorithms.items():
    for c in range(3, 6):  # 3, 4, 5 clusters
        try:
            if technique_name == "Mean-Shift":
                model = technique()
            else:
                model = technique(n_clusters=c)

            model.fit(X_normalized)
            labels = model.labels_

            # Calculate evaluation metrics
            silhouette = silhouette_score(X_normalized, labels)
            calinski = calinski_harabasz_score(X_normalized, labels)
            davies = davies_bouldin_score(X_normalized, labels)
        except Exception:
            silhouette, calinski, davies = np.nan, np.nan, np.nan

        results.append({
            "Technique": technique_name,
            "Clusters": c,
            "Silhouette": silhouette,
            "Calinski-Harabasz": calinski,
            "Davies-Bouldin": davies
        })

results_df = pd.DataFrame(results)


In [14]:
# Table
def format_table(technique):
    subset = results_df[results_df['Technique'] == technique]
    pivot_table = subset.pivot_table(
        index='Clusters',
        columns='Technique',
        values=['Silhouette', 'Calinski-Harabasz', 'Davies-Bouldin']
    )
    return pivot_table

# Results
for technique in results_df['Technique'].unique():
    print(f"Performance using {technique} Clustering")
    display(format_table(technique))


Performance using K-Means Clustering


Unnamed: 0_level_0,Calinski-Harabasz,Davies-Bouldin,Silhouette
Technique,K-Means,K-Means,K-Means
Clusters,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
3,70.836887,1.391794,0.285942
4,53.265619,1.667414,0.213948
5,46.388997,1.989524,0.199552


Performance using Hierarchical Clustering


Unnamed: 0_level_0,Calinski-Harabasz,Davies-Bouldin,Silhouette
Technique,Hierarchical,Hierarchical,Hierarchical
Clusters,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
3,67.647468,1.418592,0.277444
4,51.464146,1.788651,0.225837
5,43.679272,1.922855,0.186742


Performance using Mean-Shift Clustering


Unnamed: 0_level_0,Calinski-Harabasz,Davies-Bouldin,Silhouette
Technique,Mean-Shift,Mean-Shift,Mean-Shift
Clusters,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
3,6.435434,1.320059,0.224476
4,6.435434,1.320059,0.224476
5,6.435434,1.320059,0.224476
