In [29]:
from sklearn.cluster import MeanShift
from sklearn.datasets import load_iris
import pandas as pd
from sklearn.metrics import accuracy_score,silhouette_score
import numpy as np

# ignore warnings
import warnings
warnings.filterwarnings("ignore")

In [2]:
iris = load_iris()

In [3]:
X = iris.data

In [4]:
y = iris.target

In [16]:
ms = MeanShift(bandwidth=0.8)

In [17]:
ms.fit(X)

In [18]:
labels = ms.labels_

In [19]:
print(labels)

[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 3 0 3 2 3 2
 2 2 2 2 2 2 3 3 0 2 0 3 2 2 3 0 2 2 2 3 3 2 2 2 3 2 2 0 2 2 2 2 2 2 2 2 2
 2 2]


In [20]:
# Calculate accuracy
accuracy = accuracy_score(y, labels)

In [21]:
# Create a Pandas DataFrame to organize the data
data = {
    'Ground Truth Labels': y,
    'Mean Shift Predicted Labels': labels
}

In [22]:
df = pd.DataFrame(data)

In [23]:
print("Accuracy:", accuracy)
print(df)

Accuracy: 0.23333333333333334
     Ground Truth Labels  Mean Shift Predicted Labels
0                      0                            1
1                      0                            1
2                      0                            1
3                      0                            1
4                      0                            1
..                   ...                          ...
145                    2                            2
146                    2                            2
147                    2                            2
148                    2                            2
149                    2                            2

[150 rows x 2 columns]


### HyperParameter Tuning

In [30]:
# Define a range of bandwidth values to try
bandwidths = np.arange(0.1, 1.0, 0.1)

In [31]:
best_score = -1
best_bandwidth = None

In [32]:
# Iterate over bandwidth values
for bandwidth in bandwidths:
    ms = MeanShift(bandwidth=bandwidth)
    labels = ms.fit_predict(X)
    score = silhouette_score(X, labels)
    if score > best_score:
        best_score = score
        best_bandwidth = bandwidth

In [33]:
# Apply Mean Shift clustering with the best bandwidth
ms = MeanShift(bandwidth=best_bandwidth)
labels = ms.fit_predict(X)

In [34]:
# Create a Pandas DataFrame to organize the data
data = {
    'Ground Truth Labels': iris.target,
    'Mean Shift Predicted Labels': labels
}

In [35]:
df = pd.DataFrame(data)

In [36]:
print("Best Bandwidth:", best_bandwidth)
print("Best Silhouette Score:", best_score)
print(df)

Best Bandwidth: 0.9
Best Silhouette Score: 0.6857881712617192
     Ground Truth Labels  Mean Shift Predicted Labels
0                      0                            1
1                      0                            1
2                      0                            1
3                      0                            1
4                      0                            1
..                   ...                          ...
145                    2                            0
146                    2                            0
147                    2                            0
148                    2                            0
149                    2                            0

[150 rows x 2 columns]


Yes, Mean Shift clustering with a bandwidth of 0.9 can be considered as a viable clustering method for the given Iris dataset, achieving a silhouette score of approximately 0.686.