In [43]:
import numpy as np

class KMeans:
    def __init__(self, n_clusters=2, tolerance=0.0001, max_iterations=300):
        self.n_clusters = n_clusters
        self.tolerance = tolerance
        self.max_iterations = max_iterations

    def fit(self, data, initial_seeds=None, radius=0.5):
        if initial_seeds is not None:
            centroids = np.array(initial_seeds)
        else:
            centroids = data[np.random.choice(range(len(data)), self.n_clusters, replace=False)]
        print("initial centroids are:",centroids)
        for i in range(self.max_iterations):
            clusters = {j: [] for j in range(self.n_clusters)}   #create empty clusters
            outliers = []
            for point in data:
                distances = [np.linalg.norm(point - centroid) for centroid in centroids]
                min_distance = min(distances)
                index = distances.index(min_distance)
                
                if min_distance > radius:
                    outliers.append(point)
                else:
                    clusters[index].append(point)

            prev_centroids = np.array(centroids)
            for cluster in clusters:
                if clusters[cluster]:
                    centroids[cluster] = np.average(clusters[cluster], axis=0)
            diff = np.linalg.norm(centroids - prev_centroids, axis=1)
            if np.all(diff < self.tolerance):
                break
        
            print(f"Cycle {i+1}:")
            for cluster, points in clusters.items():
                print(f"  Cluster {cluster+1}: Centroid {centroids[cluster]}, Points:{len(points)}")
            print(f"  Outliers: {outliers}")
            print(f"  Change in Centroid value: {diff}")
            print("-" * 70)
        self.centroids = centroids
        return clusters, outliers

n_clusters = int(input("Enter the number of clusters: "))
radius = float(input("Enter the radius for outlier detection: "))
#data = np.random.rand(100, 2)
range_min = float(input("enter min range"))
range_max = float(input("enter max range"))
no_points = int(input("enter no of points"))
data = np.random.uniform(range_min,range_max,size = (no_points,2))
print("data is",data)
seed_input = input("Enter initial seed points separated by commas(or leave blank to generate randomly): ")
initial_seeds = None
if seed_input:
    initial_seeds = np.array([list(map(float, seed.split(','))) for seed in seed_input.strip().split(';')])

kmeans = KMeans(n_clusters=n_clusters)
clusters, outliers = kmeans.fit(data, initial_seeds=initial_seeds, radius=radius)

print("Final clusters and outliers:")
for cluster, points in clusters.items():
    print(f"Cluster {cluster}: Centroid {kmeans.centroids[cluster]}, Points: {len(points)}")
print(f"Outliers: {len(outliers)}")


Enter the number of clusters: 4
Enter the radius for outlier detection: 12
enter min range10
enter max range40
enter no of points30
data is [[22.57029576 30.24209767]
 [26.20376912 32.58060208]
 [23.86456574 24.55064004]
 [37.32224669 12.19780112]
 [39.00707074 17.77365639]
 [32.41140425 22.01652637]
 [19.32590473 20.63002215]
 [33.30231374 24.86516662]
 [11.70588068 28.63848994]
 [20.8596465  15.42864059]
 [38.47876694 38.65097109]
 [16.74490462 26.26185217]
 [21.57690906 23.35544571]
 [15.4217602  23.89828627]
 [28.68223928 31.94007374]
 [29.69918688 27.38547602]
 [35.12225586 24.33979935]
 [16.79931684 38.54247998]
 [35.6846336  32.7865258 ]
 [14.32481273 13.94250068]
 [20.72560034 12.40031023]
 [17.0358725  21.69335973]
 [34.47614987 34.86498252]
 [18.87482255 34.92953508]
 [16.67931321 15.58528146]
 [39.37528681 16.62822257]
 [31.36639769 25.93726623]
 [23.43760614 19.07885744]
 [18.08912163 12.3443716 ]
 [23.70342037 38.90113497]]
Enter initial seed points separated by commas(or 