In [1]:
import numpy as np

# Constants
N_POINTS = 10000
DIMENSIONS = 2
K = 6
MAX_ITERATIONS = 15

# Predefined centroids to be consistent with the C implementation
predefined_centroids = np.array([
    [1.0, 1.0],   # Centroid for cluster 1
    [5.0, 5.0],   # Centroid for cluster 2
    [9.0, 9.0],   # Centroid for cluster 3
    [13.0, 13.0], # Centroid for cluster 4
    [17.0, 17.0], # Centroid for cluster 5
    [21.0, 21.0]  # Centroid for cluster 6
])

# Function to generate points around predefined centroids without randomness
def generate_simple_clusters():
    points = np.zeros((N_POINTS, DIMENSIONS))
    points_per_cluster = N_POINTS // K

    for i in range(K):
        for j in range(points_per_cluster):
            index = i * points_per_cluster + j
            points[index] = predefined_centroids[i]  # No random variation

    return points

# Function to calculate the Euclidean distance between two points
def euclidean_distance(a, b):
    return np.sqrt(np.sum((a - b) ** 2))

# K-means clustering functions (assign clusters and update centroids)
def assign_clusters(points, centroids):
    clusters = np.zeros(N_POINTS, dtype=int)
    for i in range(N_POINTS):
        distances = np.array([euclidean_distance(points[i], centroid) for centroid in centroids])
        clusters[i] = np.argmin(distances)
    return clusters

def update_centroids(points, clusters):
    new_centroids = np.zeros((K, DIMENSIONS))
    count = np.zeros(K)

    for i in range(N_POINTS):
        cluster_id = clusters[i]
        new_centroids[cluster_id] += points[i]
        count[cluster_id] += 1

    for j in range(K):
        if count[j] != 0:
            new_centroids[j] /= count[j]

    return new_centroids

# Function to print centroids
def print_centroids(title, centroids):
    print(f"{title}:")
    for i in range(K):
        print(f"Centroid {i}: {centroids[i]}")

# Main function
def kmeans_clustering():
    points = generate_simple_clusters()

    # Initialize centroids with fixed predefined points
    centroids = predefined_centroids.copy()

    print_centroids("Initial Centroids", centroids)

    for iteration in range(MAX_ITERATIONS):
        print(f"\nIteration {iteration + 1}:")
        clusters = assign_clusters(points, centroids)
        centroids = update_centroids(points, clusters)
        print_centroids("Updated Centroids", centroids)

    print_centroids("Final Centroids", centroids)

# Run the main function
kmeans_clustering()


Initial Centroids:
Centroid 0: [1. 1.]
Centroid 1: [5. 5.]
Centroid 2: [9. 9.]
Centroid 3: [13. 13.]
Centroid 4: [17. 17.]
Centroid 5: [21. 21.]

Iteration 1:
Updated Centroids:
Centroid 0: [0.99760479 0.99760479]
Centroid 1: [5. 5.]
Centroid 2: [9. 9.]
Centroid 3: [13. 13.]
Centroid 4: [17. 17.]
Centroid 5: [21. 21.]

Iteration 2:
Updated Centroids:
Centroid 0: [0.99760479 0.99760479]
Centroid 1: [5. 5.]
Centroid 2: [9. 9.]
Centroid 3: [13. 13.]
Centroid 4: [17. 17.]
Centroid 5: [21. 21.]

Iteration 3:
Updated Centroids:
Centroid 0: [0.99760479 0.99760479]
Centroid 1: [5. 5.]
Centroid 2: [9. 9.]
Centroid 3: [13. 13.]
Centroid 4: [17. 17.]
Centroid 5: [21. 21.]

Iteration 4:
Updated Centroids:
Centroid 0: [0.99760479 0.99760479]
Centroid 1: [5. 5.]
Centroid 2: [9. 9.]
Centroid 3: [13. 13.]
Centroid 4: [17. 17.]
Centroid 5: [21. 21.]

Iteration 5:
Updated Centroids:
Centroid 0: [0.99760479 0.99760479]
Centroid 1: [5. 5.]
Centroid 2: [9. 9.]
Centroid 3: [13. 13.]
Centroid 4: [17. 17.]
Ce

In [8]:
import numpy as np
import time

# Constants
N_POINTS = 10000
DIMENSIONS = 2
K = 6
MAX_ITERATIONS = 15

# Generate predefined clusters around fixed centroids
def generate_fixed_clusters():
    predefined_centroids = np.array([
        [1.0, 1.0],
        [5.0, 5.0],
        [9.0, 9.0],
        [13.0, 13.0],
        [17.0, 17.0],
        [21.0, 21.0]
    ])
    points = np.zeros((N_POINTS, DIMENSIONS))
    points_per_cluster = N_POINTS // K

    for i in range(K):
        for j in range(points_per_cluster):
            index = i * points_per_cluster + j
            points[index] = predefined_centroids[i]

    return points

# Simple function to calculate Euclidean distance
def euclidean_distance(a, b):
    return np.sqrt(np.sum((a - b) ** 2))

# K-means clustering functions (assign clusters and update centroids)
def assign_clusters(points, centroids):
    clusters = np.zeros(N_POINTS, dtype=int)
    for i in range(N_POINTS):
        distances = np.array([euclidean_distance(points[i], centroids[j]) for j in range(K)])
        clusters[i] = np.argmin(distances)
    return clusters

def update_centroids(points, clusters):
    new_centroids = np.zeros((K, DIMENSIONS))
    count = np.zeros(K)

    for i in range(N_POINTS):
        new_centroids[clusters[i]] += points[i]
        count[clusters[i]] += 1

    for j in range(K):
        if count[j] != 0:
            new_centroids[j] /= count[j]

    return new_centroids

# Function to print centroids
def print_centroids(title, centroids):
    print(f"{title}:")
    for i in range(K):
        print(f"Centroid {i}: {centroids[i]}")

# Main function
def kmeans_clustering():
    points = generate_fixed_clusters()

    centroids = np.array([
        [1.1, 1.1],
        [5.1, 5.1],
        [9.1, 9.1],
        [13.1, 13.1],
        [17.1, 17.1],
        [21.1, 21.1]
    ])

    print_centroids("Initial Centroids", centroids)

    total_start_time = time.time()

    for iteration in range(MAX_ITERATIONS):
        print(f"\nIteration {iteration + 1}:")

        start_time = time.time()
        clusters = assign_clusters(points, centroids)
        end_time = time.time()
        print(f"Assign clusters duration: {end_time - start_time} seconds")

        start_time = time.time()
        centroids = update_centroids(points, clusters)
        end_time = time.time()
        print(f"Update centroids duration: {end_time - start_time} seconds")

        print_centroids("Updated Centroids", centroids)

    total_end_time = time.time()
    print(f"Total duration: {total_end_time - total_start_time} seconds")

    print_centroids("Final Centroids", centroids)

# Run the main function
kmeans_clustering()


Initial Centroids:
Centroid 0: [1.1 1.1]
Centroid 1: [5.1 5.1]
Centroid 2: [9.1 9.1]
Centroid 3: [13.1 13.1]
Centroid 4: [17.1 17.1]
Centroid 5: [21.1 21.1]

Iteration 1:
Assign clusters duration: 0.7463226318359375 seconds
Update centroids duration: 0.017658233642578125 seconds
Updated Centroids:
Centroid 0: [0.99760479 0.99760479]
Centroid 1: [5. 5.]
Centroid 2: [9. 9.]
Centroid 3: [13. 13.]
Centroid 4: [17. 17.]
Centroid 5: [21. 21.]

Iteration 2:
Assign clusters duration: 0.7595312595367432 seconds
Update centroids duration: 0.01782059669494629 seconds
Updated Centroids:
Centroid 0: [0.99760479 0.99760479]
Centroid 1: [5. 5.]
Centroid 2: [9. 9.]
Centroid 3: [13. 13.]
Centroid 4: [17. 17.]
Centroid 5: [21. 21.]

Iteration 3:
Assign clusters duration: 0.8407690525054932 seconds
Update centroids duration: 0.01931285858154297 seconds
Updated Centroids:
Centroid 0: [0.99760479 0.99760479]
Centroid 1: [5. 5.]
Centroid 2: [9. 9.]
Centroid 3: [13. 13.]
Centroid 4: [17. 17.]
Centroid 5: [21

In [9]:
import numpy as np
import time

# Constants
N_POINTS = 10000  # Number of points
DIMENSIONS = 2
K = 6  # Number of clusters
MAX_ITERATIONS = 15

# Function to generate predefined clusters around fixed centroids
def generate_fixed_clusters():
    predefined_centroids = np.array([
        [1.0, 1.0],
        [5.0, 5.0],
        [9.0, 9.0],
        [13.0, 13.0],
        [17.0, 17.0],
        [21.0, 21.0]
    ])
    points_per_cluster = N_POINTS // K
    points = np.zeros((N_POINTS, DIMENSIONS))

    for i in range(K):
        for j in range(points_per_cluster):
            index = i * points_per_cluster + j
            points[index] = predefined_centroids[i]

    return points

# Simple function to calculate Euclidean distance
def euclidean_distance(a, b):
    return np.sqrt(np.sum((a - b) ** 2))

# K-means clustering functions (assign clusters and update centroids)
def assign_clusters(points, centroids):
    clusters = np.zeros(N_POINTS, dtype=int)
    for i in range(N_POINTS):
        distances = np.array([euclidean_distance(points[i], centroids[j]) for j in range(K)])
        clusters[i] = np.argmin(distances)
    return clusters

def update_centroids(points, clusters):
    new_centroids = np.zeros((K, DIMENSIONS))
    for j in range(K):
        points_in_cluster = points[clusters == j]
        if len(points_in_cluster) > 0:
            new_centroids[j] = np.mean(points_in_cluster, axis=0)
    return new_centroids

# Function to print centroids
def print_centroids(title, centroids):
    print(f"{title}:")
    for i in range(K):
        print(f"Centroid {i}: {centroids[i]}")

# Main function to perform K-means clustering
def kmeans_clustering():
    points = generate_fixed_clusters()

    # Initialize centroids with predefined values (same as in the DPU code)
    centroids = np.array([
        [1.1, 1.1],
        [5.1, 5.1],
        [9.1, 9.1],
        [13.1, 13.1],
        [17.1, 17.1],
        [21.1, 21.1]
    ])

    print_centroids("Initial Centroids", centroids)

    # Log total time taken
    total_start_time = time.time()

    # Perform K-means clustering
    for iteration in range(MAX_ITERATIONS):
        start_time = time.time()
        clusters = assign_clusters(points, centroids)
        print(f"Assign clusters duration: {time.time() - start_time} seconds")

        start_time = time.time()
        centroids = update_centroids(points, clusters)
        print(f"Update centroids duration: {time.time() - start_time} seconds")

        print_centroids(f"Iteration {iteration + 1}: Updated Centroids", centroids)

    total_end_time = time.time()
    print(f"Total duration: {total_end_time - total_start_time} seconds")

    print_centroids("Final Centroids", centroids)

# Run the main function
kmeans_clustering()


Initial Centroids:
Centroid 0: [1.1 1.1]
Centroid 1: [5.1 5.1]
Centroid 2: [9.1 9.1]
Centroid 3: [13.1 13.1]
Centroid 4: [17.1 17.1]
Centroid 5: [21.1 21.1]
Assign clusters duration: 1.0475986003875732 seconds
Update centroids duration: 0.001399993896484375 seconds
Iteration 1: Updated Centroids:
Centroid 0: [0.99760479 0.99760479]
Centroid 1: [5. 5.]
Centroid 2: [9. 9.]
Centroid 3: [13. 13.]
Centroid 4: [17. 17.]
Centroid 5: [21. 21.]
Assign clusters duration: 0.7991328239440918 seconds
Update centroids duration: 0.0017099380493164062 seconds
Iteration 2: Updated Centroids:
Centroid 0: [0.99760479 0.99760479]
Centroid 1: [5. 5.]
Centroid 2: [9. 9.]
Centroid 3: [13. 13.]
Centroid 4: [17. 17.]
Centroid 5: [21. 21.]
Assign clusters duration: 0.6813375949859619 seconds
Update centroids duration: 0.0017039775848388672 seconds
Iteration 3: Updated Centroids:
Centroid 0: [0.99760479 0.99760479]
Centroid 1: [5. 5.]
Centroid 2: [9. 9.]
Centroid 3: [13. 13.]
Centroid 4: [17. 17.]
Centroid 5: [2

In [14]:
import numpy as np
import time

# Constants
N_POINTS = 10000  # Number of points
DIMENSIONS = 2
K = 6  # Number of clusters
MAX_ITERATIONS = 15

# Generate predefined clusters around fixed centroids
def generate_fixed_clusters():
    predefined_centroids = np.array([
        [1.0, 1.0],   # Centroid for cluster 1
        [5.0, 5.0],   # Centroid for cluster 2
        [9.0, 9.0],   # Centroid for cluster 3
        [13.0, 13.0], # Centroid for cluster 4
        [17.0, 17.0], # Centroid for cluster 5
        [21.0, 21.0]  # Centroid for cluster 6
    ])
    points = np.zeros((N_POINTS, DIMENSIONS))
    points_per_cluster = N_POINTS // K

    for i in range(K):
        for j in range(points_per_cluster):
            index = i * points_per_cluster + j
            points[index] = predefined_centroids[i]
    return points

# Simple function to calculate Euclidean distance
def euclidean_distance(a, b):
    return np.sqrt(np.sum((a - b) ** 2))

# K-means clustering functions (assign clusters and update centroids)
def assign_clusters(points, centroids):
    clusters = np.zeros(N_POINTS, dtype=int)
    for i in range(N_POINTS):
        distances = np.array([euclidean_distance(points[i], centroids[j]) for j in range(K)])
        clusters[i] = np.argmin(distances)
    return clusters

def update_centroids(points, clusters):
    new_centroids = np.zeros((K, DIMENSIONS))
    count = np.zeros(K)

    for i in range(N_POINTS):
        new_centroids[clusters[i]] += points[i]
        count[clusters[i]] += 1

    for j in range(K):
        if count[j] != 0:
            new_centroids[j] /= count[j]

    return new_centroids

# Function to print centroids
def print_centroids(title, centroids):
    print(f"{title}:")
    for i in range(K):
        print(f"Centroid {i}: {centroids[i]}")

# Main function to run K-means clustering
def kmeans_clustering():
    # Generate fixed clusters
    points = generate_fixed_clusters()

    # Initialize centroids with predefined values
    centroids = np.array([
        [1.1, 1.1],
        [5.1, 5.1],
        [9.1, 9.1],
        [13.1, 13.1],
        [17.1, 17.1],
        [21.1, 21.1]
    ])

    print_centroids("Initial Centroids", centroids)

    # Start total time measurement
    total_start_time = time.time()

    for iteration in range(MAX_ITERATIONS):
        print(f"\nIteration {iteration + 1}:")

        # Measure assign clusters duration
        start_time = time.time()
        clusters = assign_clusters(points, centroids)
        assign_duration = time.time() - start_time
        print(f"Assign clusters duration: {assign_duration} seconds")

        # Measure update centroids duration
        start_time = time.time()
        centroids = update_centroids(points, clusters)
        update_duration = time.time() - start_time
        print(f"Update centroids duration: {update_duration} seconds")

        print_centroids("Updated Centroids", centroids)

    # End total time measurement
    total_end_time = time.time()
    print(f"Total duration: {total_end_time - total_start_time} seconds")

    print_centroids("Final Centroids", centroids)

# Run the main function
kmeans_clustering()


Initial Centroids:
Centroid 0: [1.1 1.1]
Centroid 1: [5.1 5.1]
Centroid 2: [9.1 9.1]
Centroid 3: [13.1 13.1]
Centroid 4: [17.1 17.1]
Centroid 5: [21.1 21.1]

Iteration 1:
Assign clusters duration: 0.6656386852264404 seconds
Update centroids duration: 0.017459630966186523 seconds
Updated Centroids:
Centroid 0: [0.99760479 0.99760479]
Centroid 1: [5. 5.]
Centroid 2: [9. 9.]
Centroid 3: [13. 13.]
Centroid 4: [17. 17.]
Centroid 5: [21. 21.]

Iteration 2:
Assign clusters duration: 0.8588299751281738 seconds
Update centroids duration: 0.018429040908813477 seconds
Updated Centroids:
Centroid 0: [0.99760479 0.99760479]
Centroid 1: [5. 5.]
Centroid 2: [9. 9.]
Centroid 3: [13. 13.]
Centroid 4: [17. 17.]
Centroid 5: [21. 21.]

Iteration 3:
Assign clusters duration: 0.8467097282409668 seconds
Update centroids duration: 0.017789125442504883 seconds
Updated Centroids:
Centroid 0: [0.99760479 0.99760479]
Centroid 1: [5. 5.]
Centroid 2: [9. 9.]
Centroid 3: [13. 13.]
Centroid 4: [17. 17.]
Centroid 5: [

In [1]:
import numpy as np
import time

# Constants
N_POINTS = 10000  # Number of points
DIMENSIONS = 2
K = 15  # Number of clusters
MAX_ITERATIONS = 15

# Generate predefined clusters around fixed centroids
def generate_fixed_clusters():
    predefined_centroids = np.array([
        [1.0, 1.0],   # Centroid for cluster 1
        [5.0, 5.0],   # Centroid for cluster 2
        [9.0, 9.0],   # Centroid for cluster 3
        [13.0, 13.0], # Centroid for cluster 4
        [17.0, 17.0], # Centroid for cluster 5
        [21.0, 21.0], # Centroid for cluster 6
        [25.0, 25.0], # Centroid for cluster 7
        [29.0, 29.0], # Centroid for cluster 8
        [33.0, 33.0], # Centroid for cluster 9
        [37.0, 37.0], # Centroid for cluster 10
        [41.0, 41.0], # Centroid for cluster 11
        [45.0, 45.0], # Centroid for cluster 12
        [49.0, 49.0], # Centroid for cluster 13
        [53.0, 53.0], # Centroid for cluster 14
        [57.0, 57.0]  # Centroid for cluster 15
    ])
    points = np.zeros((N_POINTS, DIMENSIONS))
    points_per_cluster = N_POINTS // K

    for i in range(K):
        for j in range(points_per_cluster):
            index = i * points_per_cluster + j
            points[index] = predefined_centroids[i]
    return points

# Simple function to calculate Euclidean distance
def euclidean_distance(a, b):
    return np.sqrt(np.sum((a - b) ** 2))

# K-means clustering functions (assign clusters and update centroids)
def assign_clusters(points, centroids):
    clusters = np.zeros(N_POINTS, dtype=int)
    for i in range(N_POINTS):
        distances = np.array([euclidean_distance(points[i], centroids[j]) for j in range(K)])
        clusters[i] = np.argmin(distances)
    return clusters

def update_centroids(points, clusters):
    new_centroids = np.zeros((K, DIMENSIONS))
    count = np.zeros(K)

    for i in range(N_POINTS):
        new_centroids[clusters[i]] += points[i]
        count[clusters[i]] += 1

    for j in range(K):
        if count[j] != 0:
            new_centroids[j] /= count[j]

    return new_centroids

# Function to print centroids
def print_centroids(title, centroids):
    print(f"{title}:")
    for i in range(K):
        print(f"Centroid {i}: {centroids[i]}")

# Main function to run K-means clustering
def kmeans_clustering():
    # Generate fixed clusters
    points = generate_fixed_clusters()

    # Initialize centroids with predefined values
    centroids = np.array([
        [1.0, 1.0],   # Centroid for cluster 1
        [5.0, 5.0],   # Centroid for cluster 2
        [9.0, 9.0],   # Centroid for cluster 3
        [13.0, 13.0], # Centroid for cluster 4
        [17.0, 17.0], # Centroid for cluster 5
        [21.0, 21.0], # Centroid for cluster 6
        [25.0, 25.0], # Centroid for cluster 7
        [29.0, 29.0], # Centroid for cluster 8
        [33.0, 33.0], # Centroid for cluster 9
        [37.0, 37.0], # Centroid for cluster 10
        [41.0, 41.0], # Centroid for cluster 11
        [45.0, 45.0], # Centroid for cluster 12
        [49.0, 49.0], # Centroid for cluster 13
        [53.0, 53.0], # Centroid for cluster 14
        [57.0, 57.0]  # Centroid for cluster 15

    ])

    print_centroids("Initial Centroids", centroids)

    # Start total time measurement
    total_start_time = time.time()

    for iteration in range(MAX_ITERATIONS):
        print(f"\nIteration {iteration + 1}:")

        # Measure assign clusters duration
        start_time = time.time()
        clusters = assign_clusters(points, centroids)
        assign_duration = time.time() - start_time
        print(f"Assign clusters duration: {assign_duration} seconds")

        # Measure update centroids duration
        start_time = time.time()
        centroids = update_centroids(points, clusters)
        update_duration = time.time() - start_time
        print(f"Update centroids duration: {update_duration} seconds")

        print_centroids("Updated Centroids", centroids)

    # End total time measurement
    total_end_time = time.time()
    print(f"Total duration: {total_end_time - total_start_time} seconds")

    print_centroids("Final Centroids", centroids)

# Run the main function
kmeans_clustering()


Initial Centroids:
Centroid 0: [1. 1.]
Centroid 1: [5. 5.]
Centroid 2: [9. 9.]
Centroid 3: [13. 13.]
Centroid 4: [17. 17.]
Centroid 5: [21. 21.]
Centroid 6: [25. 25.]
Centroid 7: [29. 29.]
Centroid 8: [33. 33.]
Centroid 9: [37. 37.]
Centroid 10: [41. 41.]
Centroid 11: [45. 45.]
Centroid 12: [49. 49.]
Centroid 13: [53. 53.]
Centroid 14: [57. 57.]

Iteration 1:
Assign clusters duration: 1.6495773792266846 seconds
Update centroids duration: 0.020070791244506836 seconds
Updated Centroids:
Centroid 0: [0.9852071 0.9852071]
Centroid 1: [5. 5.]
Centroid 2: [9. 9.]
Centroid 3: [13. 13.]
Centroid 4: [17. 17.]
Centroid 5: [21. 21.]
Centroid 6: [25. 25.]
Centroid 7: [29. 29.]
Centroid 8: [33. 33.]
Centroid 9: [37. 37.]
Centroid 10: [41. 41.]
Centroid 11: [45. 45.]
Centroid 12: [49. 49.]
Centroid 13: [53. 53.]
Centroid 14: [57. 57.]

Iteration 2:
Assign clusters duration: 1.5950143337249756 seconds
Update centroids duration: 0.021442413330078125 seconds
Updated Centroids:
Centroid 0: [0.9852071 0.