In [3]:
import csv
import numpy as np

def read_csv(file_path):
    data = []
    with open(file_path, 'r') as file:
        reader = csv.reader(file)
        for row in reader:
            data.append([float(val) for val in row])
    return np.array(data)

def euclidean_distance(x1, x2):
    return np.sqrt(np.sum((x1 - x2) ** 2))

def kmeans(data, k, max_iters):
    n = data.shape[0]  # Number of data points
    m = data.shape[1]  # Number of features

    # Randomly initialize centroids
    centroids = data[np.random.choice(n, k, replace=False)]

    for _ in range(max_iters):
        # Assign each data point to the nearest centroid
        labels = np.zeros(n)
        for i in range(n):
            distances = [euclidean_distance(data[i], centroid) for centroid in centroids]
            labels[i] = np.argmin(distances)

        # Update centroids
        new_centroids = np.zeros((k, m))
        counts = np.zeros(k)
        for i in range(n):
            cluster = int(labels[i])
            new_centroids[cluster] += data[i]
            counts[cluster] += 1

        for i in range(k):
            if counts[i] > 0:
                new_centroids[i] /= counts[i]

        # Check for convergence
        if np.all(centroids == new_centroids):
            break

        centroids = new_centroids

    return labels, centroids

# File path of the CSV dataset
csv_file_path = 'Iris.csv'

# Set the number of clusters and maximum iterations
k = 3
max_iters = 100

# Read data from CSV file
data = read_csv(csv_file_path)

# Perform k-means clustering
labels, centroids = kmeans(data, k, max_iters)

# Append cluster assignments to the dataset
clustered_data = np.concatenate((data, labels.reshape(-1, 1)), axis=1)

# Print the cluster assignments and centroids
print("Cluster assignments:")
print(labels)
print("\nCluster centroids:")
print(centroids)


Cluster assignments:
[0. 1. 1. 1. 0. 0. 1. 1. 1. 1. 0. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1.
 1. 1. 1. 0. 0. 1. 1. 0. 0. 0. 1. 1. 0. 1. 1. 0. 0. 1. 1. 0. 0. 1. 0. 1.
 0. 1. 2. 2. 2. 2. 2. 2. 2. 1. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.
 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 1. 2. 2.
 2. 2. 1. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.
 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.
 2. 2. 2. 2. 2. 2.]

Cluster centroids:
[[5.28333333 3.70833333 1.49166667 0.27916667]
 [4.77586207 3.07241379 1.62068966 0.29655172]
 [6.30103093 2.88659794 4.95876289 1.69587629]]
