### MSSV: 22520375
### HỌ VÀ TÊN: VƯƠNG DƯƠNG THÁI HÀ
### LỚP: Nhận dạng - CS338.P23

# IMPORT LIBRARY

In [5]:
import random
import math

# DEFINE KMEANS

In [6]:
class KMeans:
    def __init__(self, k=3, max_iterations=100):
        self.k = k
        self.max_iterations = max_iterations
        self.centroids = []
        self.clusters = [[] for _ in range(k)]
    
    def generate_random_points(self, n, min_val=0, max_val=100):
        return [(random.uniform(min_val, max_val), 
                random.uniform(min_val, max_val)) for _ in range(n)]
    
    def euclidean_distance(self, point1, point2):
        return math.sqrt((point1[0] - point2[0])**2 + (point1[1] - point2[1])**2)
    
    def initialize_centroids(self, points):
        return random.sample(points, self.k)
    
    def assign_points_to_clusters(self, points):
        clusters = [[] for _ in range(self.k)]
        
        for point in points:
            distances = [self.euclidean_distance(point, centroid) for centroid in self.centroids]
            closest_cluster = distances.index(min(distances))
            clusters[closest_cluster].append(point)
            
        return clusters
    
    def update_centroids(self, clusters):
        new_centroids = []
        for cluster in clusters:
            if not cluster:
                new_centroids.append(self.centroids[clusters.index(cluster)])
                continue
            avg_x = sum(point[0] for point in cluster) / len(cluster)
            avg_y = sum(point[1] for point in cluster) / len(cluster)
            new_centroids.append((avg_x, avg_y))
            
        return new_centroids
    
    def has_converged(self, old_centroids, new_centroids):
        return all(self.euclidean_distance(old, new) < 0.001 
                   for old, new in zip(old_centroids, new_centroids))
    
    def fit(self, points):
        self.centroids = self.initialize_centroids(points)
        
        for _ in range(self.max_iterations):
      
            self.clusters = self.assign_points_to_clusters(points)
         
            old_centroids = self.centroids.copy()
            self.centroids = self.update_centroids(self.clusters)
            if self.has_converged(old_centroids, self.centroids):
                break
    
    def print_results(self):
        print("Kết quả phân cụm:")
        for i, cluster in enumerate(self.clusters):
            print(f"\nCluster {i + 1} (Centroid: {self.centroids[i]}):")
            for point in cluster:
                print(f"  Điểm: {point}")
        
        print("\nTọa độ cuối cùng của các tâm cụm:")
        for i, centroid in enumerate(self.centroids):
            print(f"  Centroid {i + 1}: {centroid}")

In [7]:
kmeans = KMeans(k=3)
data_points = kmeans.generate_random_points(50) 
kmeans.fit(data_points)
kmeans.print_results()

Kết quả phân cụm:

Cluster 1 (Centroid: (59.37668816815535, 72.44342973942688)):
  Điểm: (69.24870724779856, 95.46826651330873)
  Điểm: (55.526728034585204, 73.33780044167885)
  Điểm: (32.54659944298811, 87.51830811586663)
  Điểm: (55.5504219854058, 51.57136190128932)
  Điểm: (70.5462946097832, 77.36958662506333)
  Điểm: (36.19649661718418, 76.83644700871712)
  Điểm: (38.41133648531836, 80.18684724956793)
  Điểm: (89.24461633974985, 85.70100511864143)
  Điểm: (59.32943096834731, 83.5121212950673)
  Điểm: (75.09571785891357, 63.35179524905128)
  Điểm: (44.941243900609095, 58.63037620602255)
  Điểm: (51.70455911557238, 46.541163326532555)
  Điểm: (49.7953648248698, 51.708186433233195)
  Điểm: (52.93598387874097, 50.12713936270532)
  Điểm: (69.43254342071522, 61.07267776105824)
  Điểm: (64.09264707875528, 90.65513811960373)
  Điểm: (94.80500704930407, 97.95008484284926)

Cluster 2 (Centroid: (75.46271288037634, 22.258452956773134)):
  Điểm: (95.07077549302359, 13.699166229591587)
  Điểm: 