<a href="https://colab.research.google.com/github/DikshaNadiga123/ML-LAB/blob/main/K_Means.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import silhouette_score

# K-Means Clustering Implementation
def kmeans(X, n_clusters, max_iters=300, tol=1e-4):
    n_samples, n_features = X.shape

    # Randomly initialize cluster centers
    rng = np.random.default_rng(seed=42)
    centroids = X[rng.choice(n_samples, n_clusters, replace=False)]

    for _ in range(max_iters):
        # Assign samples to nearest centroid
        distances = np.linalg.norm(X[:, np.newaxis] - centroids, axis=2)
        cluster_assignments = np.argmin(distances, axis=1)

        # Calculate new centroids
        new_centroids = np.array([X[cluster_assignments == k].mean(axis=0) for k in range(n_clusters)])

        # Check for convergence
        if np.linalg.norm(new_centroids - centroids) < tol:
            break

        centroids = new_centroids

    return centroids, cluster_assignments

# Load dataset
iris = pd.read_csv('/content/Iris.csv')

# Prepare features
X = iris[['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']].values

# Number of clusters
n_clusters = 3

# Apply K-Means
centroids, cluster_assignments = kmeans(X, n_clusters)

# Evaluate clustering using silhouette score
silhouette_avg = silhouette_score(X, cluster_assignments)

# Print results
print("Centroids:")
print(centroids)
print("\nCluster Assignments:")
print(cluster_assignments)
print("\nSilhouette Score:", silhouette_avg)

# Add cluster assignments to the original dataset
iris['Cluster'] = cluster_assignments
print("\nDataset with Clusters:")
print(iris.head())

Centroids:
[[5.88360656 2.74098361 4.38852459 1.43442623]
 [5.006      3.418      1.464      0.244     ]
 [6.85384615 3.07692308 5.71538462 2.05384615]]

Cluster Assignments:
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 2 2 2 0 2 2 2 2
 2 2 0 0 2 2 2 2 0 2 0 2 0 2 2 0 0 2 2 2 2 2 0 2 2 2 2 0 2 2 2 0 2 2 2 0 2
 2 0]

Silhouette Score: 0.5509643746707443

Dataset with Clusters:
   Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm      Species  \
0   1            5.1           3.5            1.4           0.2  Iris-setosa   
1   2            4.9           3.0            1.4           0.2  Iris-setosa   
2   3            4.7           3.2            1.3           0.2  Iris-setosa   
3   4            4.6           3.1            1.5           0.2  Iris-setosa   
4   5            5.0           3.6            1.4        