In [4]:
import pprint
import numpy as np
from math import sqrt
from random import randint

POINT = tuple[float, float, float]

def distance(p1: POINT, p2: POINT) -> float:
    
    return sqrt((p1[0] - p2[0]) ** 2 + (p1[1] - p2[1]) ** 2 + (p1[2] - p2[2]) ** 2)

def k_means(points: list[POINT], centers: list[POINT]) -> list[dict]:
    result = [
        {
            "center": center,
            "points": [],
        }
        for center in centers
    ]
    for point in points:
        index, minimum = 0, distance(point, centers[0])
        for i, center in enumerate(centers[1:], start=1):
            d = distance(point, center)
            if d < minimum:
                index, minimum = i, d
        result[index]["points"].append(point)
    return result

file_path = "points.txt"
points = []
with open(file_path, 'r') as file:
    for line in file:
        data = line.strip().split(',')
        if len(data) == 3:
            try:
                x, y, z = float(data[0]), float(data[1]), float(data[2])
                points.append((x, y, z))
            except ValueError:
                print("Invalid data:", line)

points = np.array(points)
points = points[~np.isnan(points).any(axis=1)]
points = points[~np.isinf(points).any(axis=1)]

K = int(input("Please enter the number of clusters (K): "))

centers = [(randint(-10, 10), randint(-10, 10), randint(-10, 10)) for _ in range(K)]

while True:
    clusters = k_means(points, centers)
    new_centers = []
    for cluster in clusters:
        if cluster["points"]:
            x, y, z = zip(*cluster["points"])
            new_centers.append(
                (
                    sum(x) / len(x),
                    sum(y) / len(y),
                    sum(z) / len(z),
                )
            )
    if new_centers == centers:
        break
    centers = new_centers

pprint.pprint(clusters)


Invalid data: ?,-8.28,-9.64

Invalid data: 3.75,?,1.12

Invalid data: -1.59,4.08,?

Invalid data: -2.37,8.84,?

Invalid data: 8.25,-6.57,?

Invalid data: ,-1.7,7.1

Invalid data: -8.46,,-8.79

Invalid data: -3.3,-6.23,?

Invalid data: -2.19,-0.18,?

Invalid data: 5.54,0.19,?

Invalid data: ?,-3.01,5.08

Invalid data: -3.03,9.39,?

Invalid data: 0.45,-8.32,

Invalid data: -6.87,,3.95

Invalid data: ?,8.88,9.7

Invalid data: ?,-8.94,7.42

Please enter the number of clusters (K): 3
[{'center': (-5.211111111111114, 1.9044444444444446, -1.9702777777777776),
  'points': [array([-9.49, -5.19, -7.91]),
             array([-8.  , -1.6 ,  5.39]),
             array([-1.19,  1.06,  2.32]),
             array([-4.64,  0.79, -2.26]),
             array([ 0.14,  1.  , -4.79]),
             array([-2.07,  5.4 ,  3.92]),
             array([-3.  , -2.43, -9.9 ]),
             array([-9.7 , -4.77,  8.37]),
             array([-4.67,  2.35, -3.29]),
             array([-4.56,  2.4 ,  9.44]),
           