In [None]:
import numpy as np

# Constants
N_POINTS = 10000  # You can adjust this to match your C implementation
DIMENSIONS = 2
K = 15
MAX_ITERATIONS = 100

# Function to initialize random points
def initialize_points(n_points, dimensions):
    return np.random.rand(n_points, dimensions)

# Function to initialize centroids randomly from the points
def initialize_centroids(points, k):
    indices = np.random.choice(points.shape[0], k, replace=False)
    return points[indices, :]

# Function to calculate the Euclidean distance between two points
def calculate_distance(point1, point2):
    return np.sqrt(np.sum((point1 - point2) ** 2))

# K-means algorithm
def kmeans(points, k, max_iterations):
    centroids = initialize_centroids(points, k)
    for iteration in range(max_iterations):
        print(f"Iteration {iteration}")

        # Step 1: Assign clusters
        clusters = np.zeros(points.shape[0], dtype=int)
        for i in range(points.shape[0]):
            distances = np.array([calculate_distance(points[i], centroid) for centroid in centroids])
            clusters[i] = np.argmin(distances)

        # Step 2: Update centroids
        new_centroids = np.zeros((k, points.shape[1]))
        for j in range(k):
            points_in_cluster = points[clusters == j]
            if len(points_in_cluster) > 0:
                new_centroids[j] = np.mean(points_in_cluster, axis=0)
            else:
                new_centroids[j] = centroids[j]  # Handle empty cluster by keeping the old centroid

        # Check for convergence (if centroids do not change)
        if np.allclose(centroids, new_centroids):
            break

        centroids = new_centroids

    return centroids, clusters

# Main code to run the K-means algorithm
if __name__ == "__main__":
    points = initialize_points(N_POINTS, DIMENSIONS)
    centroids, clusters = kmeans(points, K, MAX_ITERATIONS)

    print("Final Centroids:")
    print(centroids)


Iteration 0
Iteration 1
Iteration 2
Iteration 3
Iteration 4
Iteration 5
Iteration 6
Iteration 7
Iteration 8
Iteration 9
Iteration 10
Iteration 11
Iteration 12
Iteration 13
Iteration 14
Iteration 15
Iteration 16
Iteration 17
Iteration 18
Iteration 19
Iteration 20
Iteration 21
Iteration 22
Iteration 23
Iteration 24
Iteration 25
Iteration 26
Iteration 27
Iteration 28
Iteration 29
Iteration 30
Iteration 31
Iteration 32
Iteration 33
Iteration 34
Iteration 35
Iteration 36
Iteration 37
Iteration 38
Iteration 39
Iteration 40
Iteration 41
Final Centroids:
[[0.16411766 0.37277769]
 [0.62235664 0.86267729]
 [0.63309745 0.1297799 ]
 [0.12482845 0.88215425]
 [0.37847694 0.60285419]
 [0.63767764 0.60729591]
 [0.88401353 0.13625227]
 [0.10595956 0.12998233]
 [0.83898975 0.38578783]
 [0.11703131 0.62459863]
 [0.35353628 0.1312848 ]
 [0.87395953 0.64325552]
 [0.87736348 0.89239769]
 [0.50436582 0.3621497 ]
 [0.37781457 0.87315643]]


In [None]:
import numpy as np

# Parameters
N_POINTS = 10000
DIMENSIONS = 2

# Generate random points
points = np.random.rand(N_POINTS, DIMENSIONS)

# Save the points to a file
np.savetxt('points_dataset.csv', points, delimiter=',')

# Run your K-means clustering algorithm in Python using the points
# [ Your Python K-means code here ]

import numpy as np

# Constants
N_POINTS = 10000  # You can adjust this to match your C implementation
DIMENSIONS = 2
K = 15
MAX_ITERATIONS = 100

# Function to initialize random points
def initialize_points(n_points, dimensions):
    return np.random.rand(n_points, dimensions)

# Function to initialize centroids randomly from the points
def initialize_centroids(points, k):
    indices = np.random.choice(points.shape[0], k, replace=False)
    return points[indices, :]

# Function to calculate the Euclidean distance between two points
def calculate_distance(point1, point2):
    return np.sqrt(np.sum((point1 - point2) ** 2))

# K-means algorithm
def kmeans(points, k, max_iterations):
    centroids = initialize_centroids(points, k)
    for iteration in range(max_iterations):
        print(f"Iteration {iteration}")

        # Step 1: Assign clusters
        clusters = np.zeros(points.shape[0], dtype=int)
        for i in range(points.shape[0]):
            distances = np.array([calculate_distance(points[i], centroid) for centroid in centroids])
            clusters[i] = np.argmin(distances)

        # Step 2: Update centroids
        new_centroids = np.zeros((k, points.shape[1]))
        for j in range(k):
            points_in_cluster = points[clusters == j]
            if len(points_in_cluster) > 0:
                new_centroids[j] = np.mean(points_in_cluster, axis=0)
            else:
                new_centroids[j] = centroids[j]  # Handle empty cluster by keeping the old centroid

        # Check for convergence (if centroids do not change)
        if np.allclose(centroids, new_centroids):
            break

        centroids = new_centroids

    return centroids, clusters

# Main code to run the K-means algorithm
if __name__ == "__main__":
    points = initialize_points(N_POINTS, DIMENSIONS)
    centroids, clusters = kmeans(points, K, MAX_ITERATIONS)

    print("Final Centroids:")
    print(centroids)


# After clustering, save the centroids for comparison
np.savetxt('python_centroids.csv', centroids, delimiter=',')


Iteration 0
Iteration 1
Iteration 2
Iteration 3
Iteration 4
Iteration 5
Iteration 6
Iteration 7
Iteration 8
Iteration 9
Iteration 10
Iteration 11
Iteration 12
Iteration 13
Iteration 14
Iteration 15
Iteration 16
Iteration 17
Iteration 18
Iteration 19
Iteration 20
Iteration 21
Iteration 22
Iteration 23
Iteration 24
Iteration 25
Iteration 26
Iteration 27
Iteration 28
Iteration 29
Iteration 30
Iteration 31
Iteration 32
Iteration 33
Iteration 34
Iteration 35
Iteration 36
Iteration 37
Iteration 38
Iteration 39
Iteration 40
Iteration 41
Iteration 42
Iteration 43
Iteration 44
Iteration 45
Iteration 46
Iteration 47
Iteration 48
Iteration 49
Iteration 50
Iteration 51
Iteration 52
Iteration 53
Iteration 54
Iteration 55
Iteration 56
Iteration 57
Iteration 58
Iteration 59
Iteration 60
Iteration 61
Iteration 62
Iteration 63
Iteration 64
Iteration 65
Iteration 66
Iteration 67
Iteration 68
Iteration 69
Iteration 70
Iteration 71
Iteration 72
Iteration 73
Iteration 74
Iteration 75
Iteration 76
Iteration

In [None]:
# Load the points dataset
points = np.loadtxt('points_dataset.csv', delimiter=',')

# Run your K-means clustering algorithm using these points


In [None]:
from google.colab import files
files.download('points_dataset.csv')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
import numpy as np

# Constants
N_POINTS = 10000  # Number of points
DIMENSIONS = 2    # Number of dimensions

# Generate random points
points = np.random.rand(N_POINTS, DIMENSIONS)

# Save the points to a file
np.savetxt('points.txt', points)


In [None]:
pwd


'/content'

In [None]:
import numpy as np

# Constants
N_POINTS = 10000  # Make sure this matches the data in points.txt
DIMENSIONS = 2
K = 15
MAX_ITERATIONS = 100

# Function to load points from a file
def load_points_from_file(filename):
    return np.loadtxt(filename)

# Function to initialize centroids randomly from the points
def initialize_centroids(points, k):
    indices = np.random.choice(points.shape[0], k, replace=False)
    return points[indices, :]

# Function to calculate the Euclidean distance between two points
def calculate_distance(point1, point2):
    return np.sqrt(np.sum((point1 - point2) ** 2))

# K-means algorithm
def kmeans(points, k, max_iterations):
    centroids = initialize_centroids(points, k)
    for iteration in range(max_iterations):
        print(f"Iteration {iteration}")

        # Step 1: Assign clusters
        clusters = np.zeros(points.shape[0], dtype=int)
        for i in range(points.shape[0]):
            distances = np.array([calculate_distance(points[i], centroid) for centroid in centroids])
            clusters[i] = np.argmin(distances)

        # Step 2: Update centroids
        new_centroids = np.zeros((k, points.shape[1]))
        for j in range(k):
            points_in_cluster = points[clusters == j]
            if len(points_in_cluster) > 0:
                new_centroids[j] = np.mean(points_in_cluster, axis=0)
            else:
                new_centroids[j] = centroids[j]  # Handle empty cluster by keeping the old centroid

        # Check for convergence (if centroids do not change)
        if np.allclose(centroids, new_centroids):
            break

        centroids = new_centroids

    return centroids, clusters

# Main code to run the K-means algorithm
if __name__ == "__main__":
    points = load_points_from_file('points.txt')
    centroids, clusters = kmeans(points, K, MAX_ITERATIONS)

    print("Final Centroids:")
    print(centroids)


Iteration 0
Iteration 1
Iteration 2
Iteration 3
Iteration 4
Iteration 5
Iteration 6
Iteration 7
Iteration 8
Iteration 9
Iteration 10
Iteration 11
Iteration 12
Iteration 13
Iteration 14
Iteration 15
Iteration 16
Iteration 17
Iteration 18
Iteration 19
Iteration 20
Iteration 21
Iteration 22
Iteration 23
Iteration 24
Iteration 25
Iteration 26
Iteration 27
Iteration 28
Iteration 29
Iteration 30
Iteration 31
Iteration 32
Iteration 33
Iteration 34
Iteration 35
Iteration 36
Iteration 37
Iteration 38
Iteration 39
Iteration 40
Iteration 41
Iteration 42
Iteration 43
Iteration 44
Iteration 45
Iteration 46
Iteration 47
Iteration 48
Iteration 49
Iteration 50
Iteration 51
Final Centroids:
[[0.37437006 0.43229737]
 [0.61028436 0.11753107]
 [0.87632996 0.38370544]
 [0.12395809 0.11794557]
 [0.11742493 0.61345622]
 [0.11511359 0.35995638]
 [0.87084066 0.1285476 ]
 [0.63311933 0.63590002]
 [0.88134747 0.64821303]
 [0.15209623 0.88170151]
 [0.3665462  0.15297051]
 [0.83730247 0.88617336]
 [0.37029993 0.69

In [None]:
import numpy as np

# Constants
N_POINTS = 10000  # Number of points
DIMENSIONS = 2
K = 6  # Number of clusters
MAX_ITERATIONS = 15

# Simple Linear Congruential Generator (LCG)
next_val = 1
def my_rand():
    global next_val
    next_val = next_val * 1103515245 + 12345
    return (next_val // 65536) % 32768

# Generate points around predefined centroids
def generate_simple_clusters():
    predefined_centroids = np.array([
        [1.0, 1.0],
        [5.0, 5.0],
        [9.0, 9.0],
        [13.0, 13.0],
        [17.0, 17.0],
        [21.0, 21.0]
    ])
    points = np.zeros((N_POINTS, DIMENSIONS))
    points_per_cluster = N_POINTS // K

    for i in range(K):
        for j in range(points_per_cluster):
            index = i * points_per_cluster + j
            points[index] = predefined_centroids[i] + (my_rand() % 100) / 1000.0

    return points

# Simple function to calculate Euclidean distance
def euclidean_distance(a, b):
    return np.sqrt(np.sum((a - b) ** 2))

# K-means clustering functions (assign clusters and update centroids)
def assign_clusters(points, centroids):
    clusters = np.zeros(N_POINTS, dtype=int)
    for i in range(N_POINTS):
        distances = np.array([euclidean_distance(points[i], centroids[j]) for j in range(K)])
        clusters[i] = np.argmin(distances)
    return clusters

def update_centroids(points, clusters):
    new_centroids = np.zeros((K, DIMENSIONS))
    count = np.zeros(K)

    for i in range(N_POINTS):
        new_centroids[clusters[i]] += points[i]
        count[clusters[i]] += 1

    for j in range(K):
        if count[j] != 0:
            new_centroids[j] /= count[j]

    return new_centroids

# Function to print centroids
def print_centroids(title, centroids):
    print(f"{title}:")
    for i in range(K):
        print(f"Centroid {i}: {centroids[i]}")

# Main function
def kmeans_clustering():
    points = generate_simple_clusters()

    centroids = np.zeros((K, DIMENSIONS))
    for i in range(K):
        centroids[i] = points[my_rand() % N_POINTS]

    print_centroids("Initial Centroids", centroids)

    for iteration in range(MAX_ITERATIONS):
        print(f"\nIteration {iteration + 1}:")
        clusters = assign_clusters(points, centroids)
        centroids = update_centroids(points, clusters)
        print_centroids("Updated Centroids", centroids)

    print_centroids("Final Centroids", centroids)

# Run the main function
kmeans_clustering()


Initial Centroids:
Centroid 0: [17.054 17.054]
Centroid 1: [1.088 1.088]
Centroid 2: [21.041 21.041]
Centroid 3: [21.023 21.023]
Centroid 4: [21.073 21.073]
Centroid 5: [17.013 17.013]

Iteration 1:
Updated Centroids:
Centroid 0: [17.06577644 17.06577644]
Centroid 1: [4.2636837 4.2636837]
Centroid 2: [21.04484304 21.04484304]
Centroid 3: [21.014693 21.014693]
Centroid 4: [21.07889216 21.07889216]
Centroid 5: [12.75611379 12.75611379]

Iteration 2:
Updated Centroids:
Centroid 0: [17.04820768 17.04820768]
Centroid 1: [3.04604796 3.04604796]
Centroid 2: [21.04609259 21.04609259]
Centroid 3: [21.01391337 21.01391337]
Centroid 4: [21.08079661 21.08079661]
Centroid 5: [11.04938445 11.04938445]

Iteration 3:
Updated Centroids:
Centroid 0: [17.04820768 17.04820768]
Centroid 1: [3.04604796 3.04604796]
Centroid 2: [21.04728429 21.04728429]
Centroid 3: [21.01421072 21.01421072]
Centroid 4: [21.08159807 21.08159807]
Centroid 5: [11.04938445 11.04938445]

Iteration 4:
Updated Centroids:
Centroid 0:

In [None]:
import numpy as np

# Constants
N_POINTS = 10000
DIMENSIONS = 2
K = 6
MAX_ITERATIONS = 15

# Initialize the data points and centroids
points = np.zeros((N_POINTS, DIMENSIONS))
centroids = np.zeros((K, DIMENSIONS))
clusters = np.zeros(N_POINTS, dtype=int)

# Predefined centroids similar to the C code
predefined_centroids = np.array([
    [1.0, 1.0],   # Centroid for cluster 1
    [5.0, 5.0],   # Centroid for cluster 2
    [9.0, 9.0],   # Centroid for cluster 3
    [13.0, 13.0], # Centroid for cluster 4
    [17.0, 17.0], # Centroid for cluster 5
    [21.0, 21.0]  # Centroid for cluster 6
])

# Simple Linear Congruential Generator (LCG) to match C code behavior
def my_rand(seed=[1]):
    seed[0] = (seed[0] * 1103515245 + 12345) & 0x7fffffff
    return seed[0]

# Generate simple clusters around predefined centroids
def generate_simple_clusters():
    points_per_cluster = N_POINTS // K
    for i in range(K):
        for j in range(points_per_cluster):
            index = i * points_per_cluster + j
            points[index] = predefined_centroids[i] + my_rand() % 100 / 1000.0

# Simple function to calculate square root using Newton's method
def sqrt_approx(number):
    x = number
    y = 1.0
    e = 0.01  # error threshold

    while abs(x - y) > e:
        x = (x + y) / 2
        y = number / x
    return x

# Assign clusters
def assign_clusters():
    for i in range(N_POINTS):
        min_distance = float('inf')
        closest_centroid = 0
        for j in range(K):
            distance = 0.0
            for d in range(DIMENSIONS):
                diff = points[i][d] - centroids[j][d]
                distance += diff * diff
            distance = sqrt_approx(distance)
            if distance < min_distance:
                min_distance = distance
                closest_centroid = j
        clusters[i] = closest_centroid

# Update centroids
def update_centroids():
    global centroids
    new_centroids = np.zeros((K, DIMENSIONS))
    count = np.zeros(K)

    for i in range(N_POINTS):
        cluster_id = clusters[i]
        new_centroids[cluster_id] += points[i]
        count[cluster_id] += 1

    for j in range(K):
        if count[j] != 0:
            centroids[j] = new_centroids[j] / count[j]

# Function to print centroids
def print_centroids(title):
    print(f"{title}:")
    for i in range(K):
        print(f"Centroid {i}: {centroids[i]}")

# Main function to perform K-means clustering
def kmeans():
    generate_simple_clusters()

    # Initialize centroids randomly from points
    for i in range(K):
        centroids[i] = points[my_rand() % N_POINTS]

    print_centroids("Initial Centroids")

    # Perform K-means clustering
    for iteration in range(MAX_ITERATIONS):
        print(f"\nIteration {iteration + 1}:")
        assign_clusters()
        update_centroids()
        print_centroids("Updated Centroids")

    print_centroids("Final Centroids")

# Run the K-means clustering
kmeans()


Initial Centroids:
Centroid 0: [5.08 5.08]
Centroid 1: [21.081 21.081]
Centroid 2: [5.074 5.074]
Centroid 3: [5.055 5.055]
Centroid 4: [5.032 5.032]
Centroid 5: [9.053 9.053]

Iteration 1:
Updated Centroids:
Centroid 0: [5.08839362 5.08839362]
Centroid 1: [19.04917947 19.04917947]
Centroid 2: [5.07100976 5.07100976]
Centroid 3: [5.05385269 5.05385269]
Centroid 4: [2.25808618 2.25808618]
Centroid 5: [11.05009904 11.05009904]

Iteration 2:
Updated Centroids:
Centroid 0: [5.08957988 5.08957988]
Centroid 1: [19.04917947 19.04917947]
Centroid 2: [5.07116912 5.07116912]
Centroid 3: [5.03169792 5.03169792]
Centroid 4: [1.04647725 1.04647725]
Centroid 5: [11.05009904 11.05009904]

Iteration 3:
Updated Centroids:
Centroid 0: [5.0900559 5.0900559]
Centroid 1: [19.04917947 19.04917947]
Centroid 2: [5.06572765 5.06572765]
Centroid 3: [5.02606721 5.02606721]
Centroid 4: [1.04647725 1.04647725]
Centroid 5: [11.05009904 11.05009904]

Iteration 4:
Updated Centroids:
Centroid 0: [5.08886704 5.08886704]