In [13]:
import numpy as np
from PIL import Image as im
import matplotlib.pyplot as plt
import sklearn.datasets as datasets

centers = [[0, 0], [2, 2], [-3, 2], [2, -4]]
X, _ = datasets.make_blobs(n_samples=300, centers=centers, cluster_std=1, random_state=0)

class KMeans():

    def __init__(self, data, k):
        self.data = data
        self.k = k
        self.assignment = [-1 for _ in range(len(data))]
        self.snaps = []
    
    def snap(self, centers):
        TEMPFILE = "temp.png"

        fig, ax = plt.subplots()
        ax.scatter(X[:, 0], X[:, 1], c=self.assignment)
        ax.scatter(centers[:,0], centers[:, 1], c='r')
        fig.savefig(TEMPFILE)
        plt.close()
        self.snaps.append(im.fromarray(np.asarray(im.open(TEMPFILE))))

    def isunassigned(self, i):
        return self.assignment[i] == -1

    def initialize(self, typ='random'):
        if typ == 'random':
            return self.data[np.random.choice(len(self.data) - 1, size=self.k, replace=False)]
        elif typ == 'farthest':
            centers = [self.data[0]]
            for i in range(self.k - 1):
                farthest = 0
                dist = 0
                for j in range(len(self.data)):
                    if self.isunassigned(j):
                        new_dist = sum([self.dist(self.data[j], center) for center in centers])
                        if new_dist > dist:
                            farthest = j
                            dist = new_dist
                centers.append(self.data[farthest])
            return np.array(centers)
        elif typ == 'kmeans++':
            centers = [self.data[np.random.choice(len(self.data) - 1)]]
            for i in range(self.k - 1):
                dists = [min([self.dist(center, x) for center in centers]) for x in self.data]
                probs = [dist**2 / sum(dists) for dist in dists]
            
                # centers.append(self.data[np.random.choice(len(self.data) - 1, p=probs)])
                # normalize probs
                probs = np.array(probs) / sum(probs)
                centers.append(self.data[np.random.choice(len(self.data), p=probs)])
            return np.array(centers)
        elif typ == 'Manual':
            raise NotImplementedError()
        else:
            raise ValueError("Invalid initialization type")

    def make_clusters(self, centers):
        for i in range(len(self.assignment)):
            for j in range(self.k):
                if self.isunassigned(i):
                    self.assignment[i] = j
                    dist = self.dist(centers[j], self.data[i])
                else:
                    new_dist = self.dist(centers[j], self.data[i])
                    if new_dist < dist:
                        self.assignment[i] = j
                        dist = new_dist
                    
        
    def compute_centers(self):
        centers = []
        for i in range(self.k):
            cluster = []
            for j in range(len(self.assignment)):
                if self.assignment[j] == i:
                    cluster.append(self.data[j])
            centers.append(np.mean(np.array(cluster), axis=0))

        return np.array(centers)
    
    def unassign(self):
        self.assignment = [-1 for _ in range(len(self.data))]

    def are_diff(self, centers, new_centers):
        for i in range(self.k):
            if self.dist(centers[i], new_centers[i]) != 0:
                return True
        return False

    def dist(self, x, y):
        # Euclidean distance
        return sum((x - y)**2) ** (1/2)

    def lloyds(self, typ='random'):
        centers = self.initialize(typ)
        self.make_clusters(centers)
        new_centers = self.compute_centers()
        self.snap(new_centers)
        while self.are_diff(centers, new_centers):
            self.unassign()
            centers = new_centers
            self.make_clusters(centers)
            new_centers = self.compute_centers()
            self.snap(new_centers)
        return
    



kmeans = KMeans(X, 4)
# kmeans.lloyds()
kmeans.lloyds(typ='kmeans++')

images = kmeans.snaps

images[0].save(
    'kmeanspp.gif',
    optimize=False,
    save_all=True,
    append_images=images[1:],
    loop=0,
    duration=500
)

In [15]:
# show all images in images
for i in range(len(images)):
    images[i].show()
    

KeyboardInterrupt: 

In [12]:
from kmeans import KMeans  # Your custom KMeans implementation
import numpy as np
# k = int(request.form['k'])
# method = request.form['init_method']  # Random, Farthest, KMeans++ or Manual

# Load the dataset
points = np.load('dataset.npy')

# Initialize and run the KMeans algorithm
kmeans = KMeans(data=points, n_clusters=5, init_method='random')
kmeans.fit(points)

# Generate the final plot
plot_html = kmeans.plot()

# save to file
with open('plot.html', 'w', encoding='utf-8') as f:
    f.write(plot_html)
    f.close()

In [23]:
kmeans.step()

False

In [24]:
len(kmeans.snaps)

9

In [26]:
# combine all snaps into one html file
html = kmeans.snaps[0]
for snap in kmeans.snaps[1:]:
    html += snap

with open('plot.html', 'w', encoding='utf-8') as f:
    f.write(html)
    f.close()