In [2]:
import numpy as np
import plotly.graph_objects as go
from sklearn.datasets import make_blobs

# Generate synthetic data with 3 distinct clusters
X, _ = make_blobs(n_samples=150,  # Total number of points
                  centers=3,      # Number of clusters to generate
                  cluster_std=0.8,# Standard deviation of the clusters (spread)
                  random_state=42)# For reproducibility
                                  # We ignore the second output (y), which are the true labels

# X is now a NumPy array with 150 rows and 2 columns (our features)

# Let's visualize the raw data before clustering
fig_raw = go.Figure(data=[go.Scatter(
    x=X[:, 0],
    y=X[:, 1],
    mode='markers',
    marker=dict(color='#495057', size=7, opacity=0.8) # Use gray for raw data
)])

fig_raw.update_layout(
    title='Synthetic Data Points (Before Clustering)',
    xaxis_title='Feature 1',
    yaxis_title='Feature 2',
    width=600,
    height=450,
    plot_bgcolor='#f8f9fa' # Light background
)

# Display the plot (In a notebook/web environment)
# fig_raw.show()

In [3]:
from sklearn.cluster import KMeans

kmeans = KMeans(n_clusters=3, n_init='auto', random_state=42)

# Fit the algorithm to the data X
# This is where K-Means iterates: assigning points to clusters and updating centroids.
kmeans.fit(X)

# After fitting, the model contains the results:
# 1. Cluster assignments for each data point:
cluster_labels = kmeans.labels_
# 2. Coordinates of the final cluster centers (centroids):
centroids = kmeans.cluster_centers_

# print("Cluster labels assigned to each point:", cluster_labels)
# print("Coordinates of final centroids:\n", centroids)

In [4]:
# Define colors for the clusters - using the suggested palette
cluster_colors = ['#4263eb', '#12b886', '#fd7e14'] # Indigo, Teal, Orange
centroid_color = '#f03e3e' # Red for centroids

# Create the plot
fig_clustered = go.Figure()

# Add data points, colored by cluster label
for i in range(3): # Loop through clusters 0, 1, 2
    points_in_cluster = X[cluster_labels == i]
    fig_clustered.add_trace(go.Scatter(
        x=points_in_cluster[:, 0],
        y=points_in_cluster[:, 1],
        mode='markers',
        marker=dict(color=cluster_colors[i], size=7, opacity=0.8),
        name=f'Cluster {i}'
    ))

# Add the centroids
fig_clustered.add_trace(go.Scatter(
    x=centroids[:, 0],
    y=centroids[:, 1],
    mode='markers',
    marker=dict(color=centroid_color, size=14, symbol='x', line=dict(width=3)),
    name='Centroids'
))

fig_clustered.update_layout(
    title=f'K-Means Clustering Results (K=3)',
    xaxis_title='Feature 1',
    yaxis_title='Feature 2',
    width=600,
    height=450,
    plot_bgcolor='#f8f9fa',
    legend_title_text='Legend'
)

# Display the plot
# fig_clustered.show()