In [12]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.patches import Circle
from sklearn.datasets import make_blobs, make_moons, make_circles
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import ipywidgets as widgets
from IPython.display import display, HTML, clear_output
from scipy.spatial import Voronoi, voronoi_plot_2d
from scipy.spatial.distance import cdist
import warnings
warnings.filterwarnings('ignore')

sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (12, 8)

# Interactive K-Means Clustering Visualization

This notebook provides a comprehensive, interactive visualization of the K-Means clustering algorithm with step-by-step mathematical operations.

## Features:

1. **Step-by-Step Visualization:**
   - Initial random centroid placement
   - Assignment step with distance calculations
   - Update step showing centroid recalculation
   - Convergence detection

2. **Mathematical Animations:**
   - Euclidean distance calculations: d(x,c) = √Σ(xi - ci)²
   - Distance vectors from points to centroids
   - Centroid movement trajectories
   - Within-cluster sum of squares (WCSS) optimization

3. **Interactive Controls:**
   - Number of clusters (k) slider: 2-10
   - Random seed control
   - Step-through buttons
   - Animation speed control
   - Manual centroid placement

4. **Visual Elements:**
   - Color-coded clusters with Voronoi regions
   - Decision boundaries
   - Iteration counter and convergence metrics
   - Real-time WCSS graph

In [13]:
# ============================================================================
# Dataset Generation Module
# ============================================================================

class DataGenerator:
    """Generate various datasets for K-Means visualization."""
    
    @staticmethod
    def generate_blobs(n_samples=300, n_centers=3, random_state=42, noise=1.0, separation=1.0):
        """Generate blob dataset with adjustable separation and noise."""
        X, y = make_blobs(n_samples=n_samples, centers=n_centers, 
                         cluster_std=noise, random_state=random_state,
                         center_box=(-10*separation, 10*separation))
        return X, y
    
    @staticmethod
    def generate_moons(n_samples=300, random_state=42, noise=0.1):
        """Generate moon-shaped clusters."""
        X, y = make_moons(n_samples=n_samples, noise=noise, random_state=random_state)
        # Scale to similar range as blobs
        X = X * 5
        return X, y
    
    @staticmethod
    def generate_circles(n_samples=300, random_state=42, noise=0.1, factor=0.5):
        """Generate circular clusters."""
        X, y = make_circles(n_samples=n_samples, noise=noise, random_state=random_state, factor=factor)
        X = X * 5
        return X, y
    
    @staticmethod
    def generate_random(n_samples=300, random_state=42, bounds=(-10, 10)):
        """Generate random uniform distribution."""
        np.random.seed(random_state)
        X = np.random.uniform(bounds[0], bounds[1], (n_samples, 2))
        y = np.zeros(n_samples)
        return X, y

def generate_data(data_type='blobs', n_samples=300, n_centers=3, random_state=42, **kwargs):
    """Unified data generation function."""
    generator = DataGenerator()
    
    if data_type == 'blobs':
        return generator.generate_blobs(n_samples, n_centers, random_state, 
                                       kwargs.get('noise', 1.0), 
                                       kwargs.get('separation', 1.0))
    elif data_type == 'moons':
        return generator.generate_moons(n_samples, random_state, kwargs.get('noise', 0.1))
    elif data_type == 'circles':
        return generator.generate_circles(n_samples, random_state, 
                                         kwargs.get('noise', 0.1),
                                         kwargs.get('factor', 0.5))
    elif data_type == 'random':
        return generator.generate_random(n_samples, random_state, kwargs.get('bounds', (-10, 10)))
    else:
        return generator.generate_blobs(n_samples, n_centers, random_state)

In [14]:
# ============================================================================
# Enhanced K-Means Class with Step-by-Step Tracking
# ============================================================================

class KMeans:
    """
    Enhanced K-means clustering with detailed step-by-step tracking for visualization.
    
    Parameters:
    -----------
    n_clusters : int
        Number of clusters to form
    max_iters : int, optional (default=100)
        Maximum number of iterations to perform
    random_state : int, optional
        Random seed for reproducibility
    tolerance : float, optional (default=1e-4)
        Convergence tolerance
    """
    def __init__(self, n_clusters, max_iters=100, random_state=None, tolerance=1e-4):
        self.n_clusters = n_clusters
        self.max_iters = max_iters
        self.random_state = random_state
        self.tolerance = tolerance
        self.inertia_ = None
        self.history = []  # Store complete history of each step
        
    def initialize_centroids(self, X, method='random'):
        """Initialize centroids. Methods: 'random' or custom positions."""
        np.random.seed(self.random_state)
        if method == 'random':
            idx = np.random.choice(X.shape[0], self.n_clusters, replace=False)
            return X[idx].copy()
        else:
            # For manual initialization
            return method  # Assume method is already an array of centroids
    
    def compute_distances(self, X, centroids):
        """
        Compute distances from each point to each centroid.
        Returns: (n_clusters, n_samples) array of distances
        """
        # Euclidean distance: d(x,c) = √Σ(xi - ci)²
        distances = np.sqrt(((X - centroids[:, np.newaxis])**2).sum(axis=2))
        return distances
    
    def assign_clusters(self, X, centroids):
        """Assign each point to nearest centroid based on Euclidean distance."""
        distances = self.compute_distances(X, centroids)
        labels = np.argmin(distances, axis=0)
        return labels, distances
    
    def update_centroids(self, X, labels):
        """
        Update centroids as mean of assigned points.
        Formula: c_new = (1/n) * Σxi for all points xi in cluster
        """
        centroids = np.zeros((self.n_clusters, X.shape[1]))
        cluster_sizes = np.zeros(self.n_clusters)
        
        for k in range(self.n_clusters):
            cluster_points = X[labels == k]
            if len(cluster_points) > 0:
                centroids[k] = np.mean(cluster_points, axis=0)
                cluster_sizes[k] = len(cluster_points)
        
        return centroids, cluster_sizes
    
    def compute_inertia(self, X, labels, centroids):
        """
        Compute within-cluster sum of squares (WCSS/Inertia).
        Formula: J = Σ Σ ||xi - μj||²
        """
        distances = np.sqrt(((X - centroids[labels])**2).sum(axis=1))
        return np.sum(distances**2)
    
    def compute_centroid_movement(self, old_centroids, new_centroids):
        """Compute how much centroids moved."""
        if old_centroids is None:
            return np.inf
        return np.max(np.linalg.norm(new_centroids - old_centroids, axis=1))
    
    def step(self, X, centroids):
        """
        Perform one iteration of K-Means algorithm.
        Returns detailed information about the step.
        """
        old_centroids = centroids.copy()
        
        # Assignment step
        labels, distances = self.assign_clusters(X, centroids)
        
        # Update step
        new_centroids, cluster_sizes = self.update_centroids(X, labels)
        
        # Calculate metrics
        inertia = self.compute_inertia(X, labels, new_centroids)
        movement = self.compute_centroid_movement(old_centroids, new_centroids)
        converged = movement < self.tolerance
        
        step_info = {
            'old_centroids': old_centroids,
            'new_centroids': new_centroids,
            'labels': labels,
            'distances': distances,
            'cluster_sizes': cluster_sizes,
            'inertia': inertia,
            'movement': movement,
            'converged': converged
        }
        
        return new_centroids, step_info
    
    def fit(self, X, initial_centroids=None):
        """
        Fit K-means clustering to the data with step-by-step tracking.
        
        Parameters:
        -----------
        X : array-like of shape (n_samples, n_features)
            Training data
        initial_centroids : array-like, optional
            Initial centroid positions (for manual placement)
        """
        self.X = X
        self.history = []
        self.inertia_history_ = []
        
        # Initialize centroids
        if initial_centroids is None:
            self.centroids_ = self.initialize_centroids(X)
        else:
            self.centroids_ = np.array(initial_centroids)
        
        # Store initial state
        _, initial_distances = self.assign_clusters(X, self.centroids_)
        initial_labels = np.argmin(initial_distances, axis=0)
        initial_inertia = self.compute_inertia(X, initial_labels, self.centroids_)
        
        self.history.append({
            'iteration': 0,
            'old_centroids': None,
            'new_centroids': self.centroids_.copy(),
            'labels': initial_labels,
            'distances': initial_distances,
            'cluster_sizes': np.bincount(initial_labels, minlength=self.n_clusters),
            'inertia': initial_inertia,
            'movement': np.inf,
            'converged': False,
            'step_type': 'initialization'
        })
        self.inertia_history_.append(initial_inertia)
        
        # Main loop
        for i in range(self.max_iters):
            self.centroids_, step_info = self.step(X, self.centroids_)
            step_info['iteration'] = i + 1
            step_info['step_type'] = 'iteration'
            self.history.append(step_info)
            self.inertia_history_.append(step_info['inertia'])
            
            if step_info['converged']:
                break
        
        self.inertia_ = self.inertia_history_[-1]
        self.labels_ = self.history[-1]['labels']
        
        return self
    
    def get_step(self, iteration):
        """Get the state at a specific iteration."""
        if 0 <= iteration < len(self.history):
            return self.history[iteration]
        return None

In [15]:
# ============================================================================
# Visualization Components
# ============================================================================

def compute_voronoi_regions(centroids, bounds):
    """Compute Voronoi regions for given centroids."""
    # Extend bounds to ensure Voronoi diagram covers all points
    extended_bounds = [
        bounds[0] - 5, bounds[1] + 5,
        bounds[2] - 5, bounds[3] + 5
    ]
    
    # Create a grid of points for Voronoi visualization
    x_min, x_max, y_min, y_max = extended_bounds
    x_range = np.linspace(x_min, x_max, 100)
    y_range = np.linspace(y_min, y_max, 100)
    xx, yy = np.meshgrid(x_range, y_range)
    grid_points = np.c_[xx.ravel(), yy.ravel()]
    
    # Compute distances from grid points to centroids
    distances = np.sqrt(((grid_points - centroids[:, np.newaxis])**2).sum(axis=2))
    grid_labels = np.argmin(distances, axis=0)
    
    return grid_points, grid_labels, xx, yy

def get_cluster_colors(n_clusters):
    """Get distinct colors for clusters."""
    colors = px.colors.qualitative.Set3[:n_clusters]
    if len(colors) < n_clusters:
        # Extend with additional colors if needed
        colors.extend(px.colors.qualitative.Pastel[:n_clusters - len(colors)])
    return colors[:n_clusters]

def create_main_visualization(X, kmeans, iteration=None, show_distances=False, 
                             show_voronoi=True, show_trajectories=True):
    """
    Create main K-Means visualization with Plotly.
    
    Parameters:
    -----------
    X : array, data points
    kmeans : KMeans object with history
    iteration : int, which iteration to show (None = latest)
    show_distances : bool, show distance lines to centroids
    show_voronoi : bool, show Voronoi regions
    show_trajectories : bool, show centroid movement trails
    """
    if iteration is None:
        iteration = len(kmeans.history) - 1
    
    step = kmeans.history[iteration]
    centroids = step['new_centroids']
    labels = step['labels']
    n_clusters = len(centroids)
    colors = get_cluster_colors(n_clusters)
    
    fig = go.Figure()
    
    # Get bounds for Voronoi diagram
    x_min, x_max = X[:, 0].min() - 2, X[:, 0].max() + 2
    y_min, y_max = X[:, 1].min() - 2, X[:, 1].max() + 2
    
    # Add Voronoi regions (decision boundaries)
    if show_voronoi and iteration > 0:
        grid_points, grid_labels, xx, yy = compute_voronoi_regions(
            centroids, [x_min, x_max, y_min, y_max]
        )
        
        for k in range(n_clusters):
            mask = grid_labels == k
            if np.any(mask):
                fig.add_trace(go.Scatter(
                    x=grid_points[mask, 0],
                    y=grid_points[mask, 1],
                    mode='markers',
                    marker=dict(
                        size=3,
                        color=colors[k],
                        opacity=0.1
                    ),
                    showlegend=False,
                    hoverinfo='skip'
                ))
    
    # Add centroid trajectories
    if show_trajectories and iteration > 0:
        for k in range(n_clusters):
            trajectory_x = [step['new_centroids'][k, 0]]
            trajectory_y = [step['new_centroids'][k, 1]]
            
            # Collect all previous positions
            for i in range(iteration):
                prev_step = kmeans.history[i]
                if prev_step['new_centroids'] is not None:
                    trajectory_x.insert(0, prev_step['new_centroids'][k, 0])
                    trajectory_y.insert(0, prev_step['new_centroids'][k, 1])
            
            if len(trajectory_x) > 1:
                fig.add_trace(go.Scatter(
                    x=trajectory_x,
                    y=trajectory_y,
                    mode='lines',
                    line=dict(color=colors[k], width=2, dash='dash'),
                    name=f'Centroid {k+1} Trail',
                    showlegend=False,
                    hoverinfo='skip'
                ))
    
    # Add data points
    for k in range(n_clusters):
        mask = labels == k
        if np.any(mask):
            fig.add_trace(go.Scatter(
                x=X[mask, 0],
                y=X[mask, 1],
                mode='markers',
                marker=dict(
                    size=8,
                    color=colors[k],
                    opacity=0.7,
                    line=dict(width=1, color='white')
                ),
                name=f'Cluster {k+1}',
                text=[f'Point {i}' for i in np.where(mask)[0]],
                hovertemplate='<b>%{text}</b><br>X: %{x:.2f}<br>Y: %{y:.2f}<extra></extra>'
            ))
    
    # Add distance lines to centroids (for selected points)
    if show_distances and iteration > 0:
        # Show distances for a few sample points (deterministic selection)
        n_samples_to_show = min(10, len(X))
        step_size = max(1, len(X) // n_samples_to_show)
        sample_indices = np.arange(0, len(X), step_size)[:n_samples_to_show]
        for idx in sample_indices:
            point = X[idx]
            assigned_cluster = labels[idx]
            distances = step['distances'][:, idx]
            
            for k in range(n_clusters):
                if k == assigned_cluster:
                    # Highlight assigned distance
                    fig.add_trace(go.Scatter(
                        x=[point[0], centroids[k, 0]],
                        y=[point[1], centroids[k, 1]],
                        mode='lines',
                        line=dict(color=colors[k], width=2),
                        showlegend=False,
                        hoverinfo='skip'
                    ))
                else:
                    # Show other distances lightly
                    fig.add_trace(go.Scatter(
                        x=[point[0], centroids[k, 0]],
                        y=[point[1], centroids[k, 1]],
                        mode='lines',
                        line=dict(color='gray', width=1, dash='dot'),
                        showlegend=False,
                        hoverinfo='skip'
                    ))
    
    # Add old centroids (ghost) if updating
    if step['old_centroids'] is not None and iteration > 0:
        for k in range(n_clusters):
            old_centroid = step['old_centroids'][k]
            fig.add_trace(go.Scatter(
                x=[old_centroid[0]],
                y=[old_centroid[1]],
                mode='markers',
                marker=dict(
                    size=15,
                    color=colors[k],
                    opacity=0.3,
                    symbol='x',
                    line=dict(width=2, color=colors[k])
                ),
                name=f'Old Centroid {k+1}',
                showlegend=False,
                hoverinfo='skip'
            ))
            
            # Show movement vector
            if np.linalg.norm(step['new_centroids'][k] - old_centroid) > 0.01:
                fig.add_trace(go.Scatter(
                    x=[old_centroid[0], step['new_centroids'][k, 0]],
                    y=[old_centroid[1], step['new_centroids'][k, 1]],
                    mode='lines+markers',
                    line=dict(color='red', width=3, dash='dashdot'),
                    marker=dict(size=8, color='red'),
                    showlegend=False,
                    hoverinfo='skip'
                ))
    
    # Add current centroids
    for k in range(n_clusters):
        centroid = centroids[k]
        fig.add_trace(go.Scatter(
            x=[centroid[0]],
            y=[centroid[1]],
            mode='markers',
            marker=dict(
                size=20,
                color=colors[k],
                symbol='star',
                line=dict(width=3, color='black'),
                opacity=1.0
            ),
            name=f'Centroid {k+1}',
            text=f'Centroid {k+1}<br>Cluster Size: {int(step["cluster_sizes"][k])}',
            hovertemplate='<b>%{text}</b><br>X: %{x:.2f}<br>Y: %{y:.2f}<extra></extra>'
        ))
    
    # Update layout
    fig.update_layout(
        title=f'K-Means Clustering - Iteration {iteration}',
        xaxis=dict(title='X', range=[x_min, x_max]),
        yaxis=dict(title='Y', range=[y_min, y_max], scaleanchor='x', scaleratio=1),
        width=800,
        height=600,
        hovermode='closest',
        showlegend=True,
        legend=dict(x=1.02, y=1)
    )
    
    return fig

In [16]:
# ============================================================================
# WCSS/Inertia Graph and Mathematical Explanation Panel
# ============================================================================

def create_wcss_graph(kmeans):
    """Create WCSS/inertia optimization graph."""
    if not kmeans.inertia_history_:
        return None
    
    iterations = list(range(len(kmeans.inertia_history_)))
    
    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=iterations,
        y=kmeans.inertia_history_,
        mode='lines+markers',
        name='WCSS',
        line=dict(color='blue', width=3),
        marker=dict(size=8)
    ))
    
    fig.update_layout(
        title='Within-Cluster Sum of Squares (WCSS) Optimization',
        xaxis=dict(title='Iteration', range=[0, max(iterations) + 1]),
        yaxis=dict(title='WCSS / Inertia', type='log'),
        width=600,
        height=400,
        hovermode='x unified'
    )
    
    return fig

def create_math_explanation_panel(kmeans, iteration):
    """Create mathematical explanation panel for current step."""
    if iteration >= len(kmeans.history):
        return None
    
    step = kmeans.history[iteration]
    
    html_content = f"""
    <div style="font-family: Arial, sans-serif; padding: 20px; background-color: #f5f5f5; border-radius: 10px; margin: 10px;">
        <h3 style="color: #2c3e50;">Mathematical Explanation - Iteration {iteration}</h3>
        
        <div style="background-color: white; padding: 15px; border-radius: 5px; margin: 10px 0;">
            <h4>Objective Function (WCSS):</h4>
            <p style="font-size: 18px; font-family: 'Courier New', monospace;">
                J = Σ<sub>i=1</sub><sup>n</sup> Σ<sub>j=1</sub><sup>k</sup> ||x<sub>i</sub> - μ<sub>j</sub>||²
            </p>
            <p><strong>Current WCSS:</strong> {step['inertia']:.2f}</p>
        </div>
        
        <div style="background-color: white; padding: 15px; border-radius: 5px; margin: 10px 0;">
            <h4>Euclidean Distance Formula:</h4>
            <p style="font-size: 16px; font-family: 'Courier New', monospace;">
                d(x, c) = √[(x₁ - c₁)² + (x₂ - c₂)²]
            </p>
            <p>This distance is computed for each point to all centroids to determine cluster assignment.</p>
        </div>
        
        <div style="background-color: white; padding: 15px; border-radius: 5px; margin: 10px 0;">
            <h4>Centroid Update Formula:</h4>
            <p style="font-size: 16px; font-family: 'Courier New', monospace;">
                c<sub>new</sub> = (1/n) × Σ<sub>i=1</sub><sup>n</sup> x<sub>i</sub>
            </p>
            <p>Each centroid is updated as the mean of all points assigned to its cluster.</p>
        </div>
        
        <div style="background-color: white; padding: 15px; border-radius: 5px; margin: 10px 0;">
            <h4>Current Step Metrics:</h4>
            <ul>
                <li><strong>Iteration:</strong> {iteration}</li>
                <li><strong>Centroid Movement:</strong> {step['movement']:.4f}</li>
                <li><strong>Converged:</strong> {'Yes' if step['converged'] else 'No'}</li>
                <li><strong>Cluster Sizes:</strong> {', '.join([f'{int(s)}' for s in step['cluster_sizes']])}</li>
            </ul>
        </div>
        
        <div style="background-color: white; padding: 15px; border-radius: 5px; margin: 10px 0;">
            <h4>Lloyd's Algorithm Steps:</h4>
            <ol>
                <li><strong>Initialization:</strong> Randomly place k centroids</li>
                <li><strong>Assignment:</strong> Assign each point to nearest centroid</li>
                <li><strong>Update:</strong> Recalculate centroids as cluster means</li>
                <li><strong>Convergence Check:</strong> Stop if centroids don't move significantly</li>
            </ol>
        </div>
    </div>
    """
    
    return HTML(html_content)


In [17]:
# ============================================================================
# Interactive Dashboard
# ============================================================================

class InteractiveKMeansDashboard:
    """Interactive K-Means visualization dashboard."""
    
    def __init__(self):
        self.X = None
        self.kmeans = None
        self.current_iteration = 0
        self.animation_running = False
        
        # Widgets
        self.k_slider = widgets.IntSlider(
            value=3, min=2, max=10, step=1,
            description='K (Clusters):',
            style={'description_width': 'initial'}
        )
        
        self.seed_slider = widgets.IntSlider(
            value=42, min=0, max=100, step=1,
            description='Random Seed:',
            style={'description_width': 'initial'}
        )
        
        self.n_samples_slider = widgets.IntSlider(
            value=300, min=50, max=500, step=50,
            description='Sample Size:',
            style={'description_width': 'initial'}
        )
        
        self.data_type_dropdown = widgets.Dropdown(
            options=['blobs', 'moons', 'circles', 'random'],
            value='blobs',
            description='Data Type:',
            style={'description_width': 'initial'}
        )
        
        self.iter_slider = widgets.IntSlider(
            value=0, min=0, max=0, step=1,
            description='Iteration:',
            style={'description_width': 'initial'}
        )
        
        self.show_distances = widgets.Checkbox(
            value=False,
            description='Show Distance Lines',
            style={'description_width': 'initial'}
        )
        
        self.show_voronoi = widgets.Checkbox(
            value=True,
            description='Show Voronoi Regions',
            style={'description_width': 'initial'}
        )
        
        self.show_trajectories = widgets.Checkbox(
            value=True,
            description='Show Centroid Trails',
            style={'description_width': 'initial'}
        )
        
        self.animation_speed = widgets.FloatSlider(
            value=1.0, min=0.1, max=3.0, step=0.1,
            description='Animation Speed:',
            style={'description_width': 'initial'}
        )
        
        # Buttons
        self.init_button = widgets.Button(
            description='Initialize',
            button_style='info',
            icon='play'
        )
        
        self.next_button = widgets.Button(
            description='Next Step',
            button_style='success',
            icon='step-forward'
        )
        
        self.run_button = widgets.Button(
            description='Run to Convergence',
            button_style='warning',
            icon='fast-forward'
        )
        
        self.reset_button = widgets.Button(
            description='Reset',
            button_style='danger',
            icon='refresh'
        )
        
        # Output areas
        self.main_output = widgets.Output()
        self.wcss_output = widgets.Output()
        self.math_output = widgets.Output()
        
        # Bind events
        self.init_button.on_click(self.on_initialize)
        self.next_button.on_click(self.on_next_step)
        self.run_button.on_click(self.on_run_to_convergence)
        self.reset_button.on_click(self.on_reset)
        
        self.k_slider.observe(self.on_parameter_change, names='value')
        self.seed_slider.observe(self.on_parameter_change, names='value')
        self.n_samples_slider.observe(self.on_parameter_change, names='value')
        self.data_type_dropdown.observe(self.on_parameter_change, names='value')
        self.iter_slider.observe(self.on_iteration_change, names='value')
        self.show_distances.observe(self.on_visual_update, names='value')
        self.show_voronoi.observe(self.on_visual_update, names='value')
        self.show_trajectories.observe(self.on_visual_update, names='value')
        
    def on_parameter_change(self, change):
        """Handle parameter changes."""
        # Don't auto-update - let user click Initialize manually
        pass
    
    def on_iteration_change(self, change):
        """Handle iteration slider change."""
        if self.kmeans is not None:
            self.update_visualizations()
    
    def on_visual_update(self, change):
        """Handle visualization option changes."""
        if self.kmeans is not None:
            self.update_visualizations()
    
    def generate_data(self):
        """Generate new dataset based on current parameters."""
        data_type = self.data_type_dropdown.value
        n_samples = self.n_samples_slider.value
        seed = self.seed_slider.value
        
        if data_type == 'blobs':
            self.X, _ = generate_data(
                data_type='blobs',
                n_samples=n_samples,
                n_centers=self.k_slider.value,
                random_state=seed
            )
        else:
            self.X, _ = generate_data(
                data_type=data_type,
                n_samples=n_samples,
                random_state=seed
            )
    
    def on_initialize(self, button):
        """Initialize K-Means algorithm."""
        if self.X is None:
            self.generate_data()
        
        self.kmeans = KMeans(
            n_clusters=self.k_slider.value,
            random_state=self.seed_slider.value
        )
        self.kmeans.fit(self.X)
        
        self.current_iteration = 0
        self.iter_slider.max = len(self.kmeans.history) - 1
        self.iter_slider.value = 0
        
        self.update_visualizations()
    
    def on_next_step(self, button):
        """Go to next iteration."""
        if self.kmeans is None:
            return
        
        if self.current_iteration < len(self.kmeans.history) - 1:
            self.current_iteration += 1
            self.iter_slider.value = self.current_iteration
            self.update_visualizations()
    
    def on_run_to_convergence(self, button):
        """Animate through all iterations."""
        if self.kmeans is None:
            return
        
        self.animation_running = True
        import time
        
        for i in range(self.current_iteration, len(self.kmeans.history)):
            if not self.animation_running:
                break
            self.current_iteration = i
            self.iter_slider.value = i
            self.update_visualizations()
            time.sleep(1.0 / self.animation_speed.value)
        
        self.animation_running = False
    
    def on_reset(self, button):
        """Reset to initial state."""
        self.animation_running = False
        self.current_iteration = 0
        if self.kmeans is not None:
            self.iter_slider.value = 0
            self.update_visualizations()
    
    def update_visualizations(self):
        """Update all visualizations."""
        if self.kmeans is None:
            return
        
        self.current_iteration = self.iter_slider.value
        
        # Main visualization
        with self.main_output:
            clear_output(wait=True)
            fig = create_main_visualization(
                self.X,
                self.kmeans,
                iteration=self.current_iteration,
                show_distances=self.show_distances.value,
                show_voronoi=self.show_voronoi.value,
                show_trajectories=self.show_trajectories.value
            )
            fig.show()
        
        # WCSS graph
        with self.wcss_output:
            clear_output(wait=True)
            wcss_fig = create_wcss_graph(self.kmeans)
            if wcss_fig:
                wcss_fig.show()
        
        # Math explanation
        with self.math_output:
            clear_output(wait=True)
            math_html = create_math_explanation_panel(self.kmeans, self.current_iteration)
            if math_html:
                display(math_html)
    
    def display(self):
        """Display the dashboard."""
        # Create layout
        controls = widgets.VBox([
            widgets.HTML("<h2>Interactive K-Means Visualization</h2>"),
            widgets.HBox([
                self.k_slider,
                self.seed_slider,
                self.n_samples_slider
            ]),
            self.data_type_dropdown,
            widgets.HBox([
                self.show_distances,
                self.show_voronoi,
                self.show_trajectories
            ]),
            widgets.HBox([
                self.init_button,
                self.next_button,
                self.run_button,
                self.reset_button
            ]),
            self.iter_slider,
            self.animation_speed
        ])
        
        visualizations = widgets.VBox([
            widgets.HBox([
                self.main_output,
                widgets.VBox([
                    self.wcss_output,
                    self.math_output
                ])
            ])
        ])
        
        dashboard = widgets.VBox([
            controls,
            visualizations
        ])
        
        display(dashboard)


In [18]:
# ============================================================================
# Distance Calculation Visualization Component
# ============================================================================

def create_distance_visualization(X, kmeans, point_idx, iteration=None):
    """
    Create detailed visualization of distance calculations for a specific point.
    Shows lines connecting the point to all centroids with distance values.
    """
    if iteration is None:
        iteration = len(kmeans.history) - 1
    
    step = kmeans.history[iteration]
    centroids = step['new_centroids']
    distances = step['distances'][:, point_idx]
    assigned_cluster = step['labels'][point_idx]
    point = X[point_idx]
    n_clusters = len(centroids)
    colors = get_cluster_colors(n_clusters)
    
    fig = go.Figure()
    
    # Get bounds
    x_min, x_max = X[:, 0].min() - 2, X[:, 0].max() + 2
    y_min, y_max = X[:, 1].min() - 2, X[:, 1].max() + 2
    
    # Plot all data points (lightly)
    fig.add_trace(go.Scatter(
        x=X[:, 0],
        y=X[:, 1],
        mode='markers',
        marker=dict(size=5, color='lightgray', opacity=0.3),
        name='Other Points',
        showlegend=False,
        hoverinfo='skip'
    ))
    
    # Highlight the selected point
    fig.add_trace(go.Scatter(
        x=[point[0]],
        y=[point[1]],
        mode='markers',
        marker=dict(size=15, color='red', symbol='circle', line=dict(width=2, color='darkred')),
        name='Selected Point',
        text=f'Point {point_idx}',
        hovertemplate='<b>%{text}</b><br>X: %{x:.2f}<br>Y: %{y:.2f}<extra></extra>'
    ))
    
    # Draw distance lines to all centroids
    for k in range(n_clusters):
        distance = distances[k]
        is_assigned = (k == assigned_cluster)
        
        # Distance line
        line_color = colors[k] if is_assigned else 'gray'
        line_width = 3 if is_assigned else 1
        line_dash = 'solid' if is_assigned else 'dot'
        
        fig.add_trace(go.Scatter(
            x=[point[0], centroids[k, 0]],
            y=[point[1], centroids[k, 1]],
            mode='lines',
            line=dict(color=line_color, width=line_width, dash=line_dash),
            name=f'Distance to Centroid {k+1}',
            showlegend=False,
            hoverinfo='skip'
        ))
        
        # Midpoint for distance label
        mid_x = (point[0] + centroids[k, 0]) / 2
        mid_y = (point[1] + centroids[k, 1]) / 2
        
        # Distance label
        label_text = f'd = {distance:.2f}'
        if is_assigned:
            label_text = f'<b>{label_text} (min)</b>'
        
        fig.add_trace(go.Scatter(
            x=[mid_x],
            y=[mid_y],
            mode='markers+text',
            marker=dict(size=0),
            text=label_text,
            textposition='middle center',
            textfont=dict(
                color=line_color,
                size=12 if is_assigned else 10,
                family='Arial Black' if is_assigned else 'Arial'
            ),
            name=f'Distance {k+1}',
            showlegend=False,
            hovertemplate=f'Distance to Centroid {k+1}: {distance:.4f}<extra></extra>'
        ))
    
    # Plot centroids
    for k in range(n_clusters):
        centroid = centroids[k]
        is_assigned = (k == assigned_cluster)
        
        fig.add_trace(go.Scatter(
            x=[centroid[0]],
            y=[centroid[1]],
            mode='markers',
            marker=dict(
                size=20,
                color=colors[k],
                symbol='star',
                line=dict(width=3, color='black'),
                opacity=1.0
            ),
            name=f'Centroid {k+1}',
            text=f'Centroid {k+1}',
            hovertemplate='<b>%{text}</b><br>X: %{x:.2f}<br>Y: %{y:.2f}<extra></extra>'
        ))
    
    # Formula annotation
    formula_text = (
        f"Distance Formula: d(x, c) = √[(x₁ - c₁)² + (x₂ - c₂)²]<br>"
        f"Point: ({point[0]:.2f}, {point[1]:.2f})<br>"
        f"Assigned to Cluster {assigned_cluster + 1} (minimum distance)"
    )
    
    fig.update_layout(
        title=f'Distance Calculation Visualization - Point {point_idx}',
        xaxis=dict(title='X', range=[x_min, x_max]),
        yaxis=dict(title='Y', range=[y_min, y_max], scaleanchor='x', scaleratio=1),
        width=800,
        height=600,
        hovermode='closest',
        annotations=[
            dict(
                x=0.5,
                y=0.02,
                xref='paper',
                yref='paper',
                text=formula_text,
                showarrow=False,
                bgcolor='rgba(255, 255, 255, 0.8)',
                bordercolor='black',
                borderwidth=1,
                font=dict(size=12, family='Courier New')
            )
        ]
    )
    
    return fig


## Usage Examples

### Quick Start: Run the Interactive Dashboard

**To launch the dashboard, simply run Cell 9 below!**

The dashboard provides:
- Interactive controls (K, seed, sample size, data type dropdown)
- Step-through buttons (Initialize, Next Step, Run to Convergence, Reset)
- Visualization toggles (Show Distance Lines, Voronoi Regions, Centroid Trails)
- Real-time WCSS optimization graph
- Mathematical explanation panel with formulas

**Steps to use:**
1. Run all cells above (Cells 0-7) to load the modules
2. Run Cell 9 to launch the interactive dashboard
3. Click "Initialize" to start
4. Use "Next Step" to go through iterations manually
5. Or click "Run to Convergence" to see the full animation


In [20]:
# ============================================================================
# LAUNCH INTERACTIVE DASHBOARD
# ============================================================================
# Run this cell to launch the interactive K-Means visualization dashboard

dashboard = InteractiveKMeansDashboard()
dashboard.display()

# The dashboard includes:
# - Interactive controls (K, seed, sample size, data type)
# - Step-through buttons (Initialize, Next Step, Run to Convergence, Reset)
# - Visualization toggles (distance lines, Voronoi regions, centroid trails)
# - Real-time WCSS graph and mathematical explanations


VBox(children=(VBox(children=(HTML(value='<h2>Interactive K-Means Visualization</h2>'), HBox(children=(IntSlid…

### Example 2: Manual Step-by-Step Visualization

You can also create visualizations manually for more control:


In [21]:
# Generate data
X, _ = generate_data(data_type='blobs', n_samples=300, n_centers=3, random_state=42)

# Create and fit K-Means
kmeans = KMeans(n_clusters=3, random_state=42, max_iters=20)
kmeans.fit(X)

# Visualize initial state
fig_initial = create_main_visualization(X, kmeans, iteration=0, show_voronoi=False)
fig_initial.show()

# Visualize final state
fig_final = create_main_visualization(X, kmeans, iteration=len(kmeans.history)-1)
fig_final.show()

# Show WCSS optimization
wcss_fig = create_wcss_graph(kmeans)
wcss_fig.show()

# Show mathematical explanation
display(create_math_explanation_panel(kmeans, len(kmeans.history)-1))


### Example 3: Distance Calculation Visualization

Visualize how distances are calculated for a specific point:


In [22]:
# Visualize distance calculations for a specific point
point_idx = 50  # Choose any point index
distance_fig = create_distance_visualization(X, kmeans, point_idx, iteration=1)
distance_fig.show()


### Example 4: Different Dataset Types

Test K-Means on different data distributions:


In [23]:
# Test on moon-shaped clusters
X_moons, _ = generate_data(data_type='moons', n_samples=300, random_state=42)
kmeans_moons = KMeans(n_clusters=2, random_state=42)
kmeans_moons.fit(X_moons)

fig_moons = create_main_visualization(X_moons, kmeans_moons, show_voronoi=True)
fig_moons.show()

# Test on circular clusters
X_circles, _ = generate_data(data_type='circles', n_samples=300, random_state=42)
kmeans_circles = KMeans(n_clusters=2, random_state=42)
kmeans_circles.fit(X_circles)

fig_circles = create_main_visualization(X_circles, kmeans_circles, show_voronoi=True)
fig_circles.show()


In [24]:
# Quick test to verify everything is working
print("Testing K-Means visualization components...")

# Test data generation
X_test, _ = generate_data(data_type='blobs', n_samples=100, n_centers=3, random_state=42)
print(f"✓ Data generation: {X_test.shape[0]} points generated")

# Test K-Means
kmeans_test = KMeans(n_clusters=3, random_state=42, max_iters=5)
kmeans_test.fit(X_test)
print(f"✓ K-Means: {len(kmeans_test.history)} steps recorded")
print(f"✓ Final WCSS: {kmeans_test.inertia_:.2f}")


# Test visualization
fig_test = create_main_visualization(X_test, kmeans_test, iteration=0, show_voronoi=False)
print("✓ Main visualization: Created successfully")

wcss_fig = create_wcss_graph(kmeans_test)
print("✓ WCSS graph: Created successfully")

print("\n✅ All components working! Ready to launch dashboard in Cell 9.")


Testing K-Means visualization components...
✓ Data generation: 100 points generated
✓ K-Means: 6 steps recorded
✓ Final WCSS: 1833.13
✓ Main visualization: Created successfully
✓ WCSS graph: Created successfully

✅ All components working! Ready to launch dashboard in Cell 9.
