In [None]:
#K-means Clustering on Iris Dataset
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from sklearn.preprocessing import LabelEncoder

# Load dataset
df = pd.read_csv("iris.csv")

# Split features (only X, no y for clustering)
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values   # For evaluation only

# Encode string labels into numbers for comparison
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# Apply K-Means with 3 clusters (since Iris has 3 species)
kmeans = KMeans(n_clusters=3, random_state=42, n_init=10)
y_kmeans = kmeans.fit_predict(X)

print("Cluster Centers:\n", kmeans.cluster_centers_)
print("\nCluster Labels (assigned by KMeans):\n", np.unique(y_kmeans))

# Evaluate clustering by comparing with true labels
# (Note: Clusters are arbitrary, so we use accuracy after remapping)
print("\nConfusion Matrix:\n", confusion_matrix(y_encoded, y_kmeans))
print("\nClassification Report:\n", classification_report(y_encoded, y_kmeans))


Cluster Centers:
 [[6.85       3.07368421 5.74210526 2.07105263]
 [5.00408163 3.41632653 1.46530612 0.24489796]
 [5.9016129  2.7483871  4.39354839 1.43387097]]

Cluster Labels (assigned by KMeans):
 [0 1 2]

Confusion Matrix:
 [[ 0 49  0]
 [ 2  0 48]
 [36  0 14]]

Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00        49
           1       0.00      0.00      0.00        50
           2       0.23      0.28      0.25        50

    accuracy                           0.09       149
   macro avg       0.08      0.09      0.08       149
weighted avg       0.08      0.09      0.08       149

