<a href="https://colab.research.google.com/github/Biski7/Machine-Learning/blob/main/Implementation_of_K_means.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import sys

def euclidean_distance(x1, y1, x2, y2):
    return np.sqrt((x2 - x1)**2 + (y2 - y1)**2)

def k_means(data, k, max_iters=100, tolerance=1e-4):
    centroids = data[np.random.choice(len(data), k, replace=False)]

    for _ in range(max_iters):

        # Calculate the distance of each point with the centroid
        distances = np.array([[euclidean_distance(x1, y1, x2, y2) for x2, y2 in centroids] for x1, y1 in data])

        # Find the minimum distance of that data point and assign a label to that data point ( 0 or 1)
        # We will later change the cluster name to start from 1 not 0
        cluster = np.argmin(distances, axis=1)
        new_centroids = np.array([np.mean(data[cluster == i], axis=0) for i in range(k)])

        # We want to stop the k-means if we found convergence
        if np.sum([euclidean_distance(x1, y1, x2, y2) for (x1, y1), (x2, y2) in zip(new_centroids, centroids)]) < tolerance:
            break

        # If no convergence, we continue the loop
        centroids = new_centroids

    return cluster, centroids

def input_from_file(file_path):
    with open(file_path, 'r') as file:
        data = []
        for every_line in file:
            x, y = map(int, every_line.split())
            data.append([x, y])
    return np.array(data)

def output_2file(file_path, data, labels):
    with open(file_path, 'w') as file:
        for point, label in zip(data, labels):
            file.write(f"{point[0]}\t{point[1]}\t{label + 1}\n")

if len(sys.argv) != 4:
    print("Should give 4 arguments (python file_name input_file k output_file)")
    exit()

# Take arguments from command line
# input_file = sys.argv[1]
# print(type(input_file))
# k = int(sys.argv[2])
# output_file = sys.argv[3]

k = 2
input_file = 'input.txt'
output_file = 'output.txt'

data = input_from_file(input_file)
labels, centroids = k_means(data, k)
output_2file(output_file, data, labels)

