### Clustering on Iris dataset by dropping the label

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from yellowbrick.cluster import KElbowVisualizer


: 

In [None]:
# Reading iris dataset and dropping the species column to make is unsupervised
df = pd.read_csv('./datasets/iris.csv')
df = df.drop(columns='species')
X = df.to_numpy()

In [None]:
# Visualizing the dataset
sns.set_style('darkgrid')
plt.figure(figsize=(10, 8))
sns.pairplot(df)
plt.show()

In [None]:
# Choosing the optimum value for k using elbow method
model = KMeans(random_state=42)
visualizer = KElbowVisualizer(model, k=(2, 8))

visualizer.fit(X)
visualizer.show() 


In [None]:
# Printing the score and value of K
K = visualizer.elbow_value_
sil_score = silhouette_score(X, model.labels_)
print("The best value of K is =", K)
print("Silhouette Score:", sil_score)
print("Inertia:", model.inertia_)

In [None]:
# Fitting the model with best value of K
model = KMeans(n_clusters=K)
model.fit(X)

In [None]:
# Add cluster column to show clustered data
cluster = model.labels_ 
cluster_names = []
for c in cluster:
    cluster_names.append(f"Species {c+1}")
df["cluster"] = cluster_names

In [None]:
# Plotting pairplot with different colour for different clusters
sns.pairplot(df, hue='cluster')
plt.show()