In [1]:
#Plotting all the points on an animated graph
%matplotlib inline
import pandas as pd
from IPython.display import display, HTML
import matplotlib.pyplot as plt
import matplotlib.animation as animation

# Read the CSV file
df = pd.read_csv('PointsCoordinates.csv')

# Extract the second and third columns for x and y coordinates
x = df.iloc[:, 1].tolist()
y = df.iloc[:, 2].tolist()

# Number of points
n = len(x)

fig, ax = plt.subplots(figsize=(10, 14))
sc = ax.scatter([], [], s=5, color='blue')

def init():
    ax.set_xlim(min(x), max(x))
    ax.set_ylim(min(y), max(y))
    return sc,

x_data, y_data = [], []

def update(frame):
    # Plot two points at a time
    x_data.extend([x[2*frame], x[2*frame + 1]])
    y_data.extend([y[2*frame], y[2*frame + 1]])
    sc.set_offsets(list(zip(x_data, y_data)))
    return sc,

global ani
ani = animation.FuncAnimation(fig, update, frames=n//2, init_func=init, blit=True, repeat=False, interval=2)

plt.title('Points from CSV on a 2D Graph')
plt.xlabel('X-axis')
plt.ylabel('Y-axis')
plt.close(fig)  # This will prevent the static plot from displaying
display(HTML(ani.to_jshtml()))


In [2]:
# plotting the progression of 3 K means clustering on the same data
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from sklearn.cluster import KMeans
from IPython.display import display, HTML

# Read the CSV file
df = pd.read_csv('PointsCoordinates.csv')

# Extract the second and third columns for x and y coordinates
data = df.iloc[:, 1:3].values

fig, ax = plt.subplots(figsize=(10, 14))
centroid_paths = [[], [], []]

def animate(i):
    ax.clear()
    
    # For the initial frame, plot all points in black
    if i == 0:
        ax.scatter(data[:, 0], data[:, 1], s=5, c='black')
        ax.set_title('Initial State')
        return

    # Fit KMeans with an increasing number of iterations and 'random' initialization
    kmeans = KMeans(n_clusters=3, init='random', n_init=1, max_iter=i, random_state=42)
    kmeans.fit(data)
    labels = kmeans.labels_
    
    # Plot points based on their cluster labels
    ax.scatter(data[labels == 0][:, 0], data[labels == 0][:, 1], s=5, c='green', label='Cluster 1')
    ax.scatter(data[labels == 1][:, 0], data[labels == 1][:, 1], s=5, c='red', label='Cluster 2')
    ax.scatter(data[labels == 2][:, 0], data[labels == 2][:, 1], s=5, c='blue', label='Cluster 3')
    
    # Plot cluster centers and their movement
    centers = kmeans.cluster_centers_
    for j, center in enumerate(centers):
        centroid_paths[j].append(center)
        path = np.array(centroid_paths[j])
        ax.plot(path[:, 0], path[:, 1], 'w--', linewidth=1)
        ax.scatter(center[0], center[1], c='black', s=100, marker='X')
    
    ax.set_title(f'Iteration: {i}')
    ax.legend()

# Animate for 11 frames (1 initial + 10 iterations)
ani = animation.FuncAnimation(fig, animate, frames=11, repeat=False, interval=500)


plt.close(fig)  # This will prevent the static plot from displaying
display(HTML(ani.to_jshtml()))


In [None]:
Test by Prateek