<a href="https://colab.research.google.com/github/SyedT1/Data-Mining-CSC417/blob/main/Week%203/KMeans.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [11]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
names = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class']
dataset = pd.read_csv(url, names=names)
data = dataset[['sepal_length', 'sepal_width', 'petal_length']].values

In [12]:
K = 3
np.random.seed(42)
initial_centroids_indices = np.random.choice(len(data), K, replace=False)
centroids = data[initial_centroids_indices]

In [13]:

def assign_to_clusters(data, centroids):
    distances = np.sqrt(((data - centroids[:, np.newaxis])**2).sum(axis=2))
    return np.argmin(distances, axis=0)


def update_centroids(data, clusters, K):
    new_centroids = np.zeros((K, data.shape[1]))
    for k in range(K):
        cluster_points = data[clusters == k]
        if len(cluster_points) > 0:
            new_centroids[k] = cluster_points.mean(axis=0)
        else:
            new_centroids[k] = data[np.random.choice(len(data), 1)]
    return new_centroids


In [14]:
max_iterations = 100
for iteration in range(max_iterations):
    clusters = assign_to_clusters(data, centroids)
    new_centroids = update_centroids(data, clusters, K)
    if np.all(centroids == new_centroids):
        print(f"Converged after {iteration+1} iterations.")
        break

    centroids = new_centroids


Converged after 7 iterations.


In [15]:
fig = px.scatter_3d(
    dataset, x='sepal_length', y='sepal_width', z='petal_length', color=clusters,
    symbol=clusters, size_max=8, opacity=0.8, title='K-Means Clustering of Iris Dataset'
)

In [16]:
centroid_df = pd.DataFrame(centroids, columns=['sepal_length', 'sepal_width', 'petal_length'])
fig.add_scatter3d(x=centroid_df['sepal_length'], y=centroid_df['sepal_width'], z=centroid_df['petal_length'], mode='markers',
                      marker=dict(color='black', symbol='circle', size=6), name='Centroids')

fig.update_layout(scene=dict(xaxis_title='Sepal Length', yaxis_title='Sepal Width', zaxis_title='Petal Length'))
fig.show()