In [1]:
import numpy as np
import pandas as pd
import random

data = pd.read_csv('data.csv')
print(data)

       X1    X2
0     1.0   2.0
1     1.5   1.8
2     5.0   8.0
3     8.0   8.0
4     1.0   0.6
..    ...   ...
105   8.9  10.4
106   7.9   2.9
107  10.9   2.9
108   9.9   3.9
109   5.9   5.4

[110 rows x 2 columns]


In [9]:
X = data.values
X = X / X.max()

def euclidean_distance(a, b):
    return np.sqrt(np.sum((a - b) ** 2))

def kmeans(X, k, max_iters=100):
    n_samples, n_features = X.shape
    centroids = X[np.random.choice(n_samples, k, replace=False)]
    for _ in range(max_iters):
        clusters = [[] for _ in range(k)]
        for idx, sample in enumerate(X):
            distances = [euclidean_distance(sample, point) for point in centroids]
            cluster_idx = np.argmin(distances)
            clusters[cluster_idx].append(idx)
        new_centroids = np.array([np.mean(X[cluster], axis=0) for cluster in clusters])
        if np.all(centroids == new_centroids):
            break
        centroids = new_centroids
    return centroids, clusters

def kmedoids(X, k, max_iters=100):
    n_samples = X.shape[0]
    medoids = X[np.random.choice(n_samples, k, replace=False)]
    for _ in range(max_iters):
        clusters = [[] for _ in range(k)]
        for idx, sample in enumerate(X):
            distances = [euclidean_distance(sample, point) for point in medoids]
            cluster_idx = np.argmin(distances)
            clusters[cluster_idx].append(idx)
        new_medoids = np.array([X[cluster][np.argmin([np.sum([euclidean_distance(X[i], X[j]) for j in cluster]) for i in cluster])] for cluster in clusters])
        if np.all(medoids == new_medoids):
            break
        medoids = new_medoids
    return medoids, clusters

k = 3
centroids, clusters = kmeans(X, k)
print("K-Means Clustering Results:")
print("Centroids:", centroids)
print("Clusters:", clusters)

medoids, clusters = kmedoids(X, k)
print("K-Medoids Clustering Results:")
print("Medoids:", medoids)
print("Clusters:", clusters)

K-Means Clustering Results:
Centroids: [[0.63550323 0.73130194]
 [0.81605529 0.24455077]
 [0.20850202 0.20062978]]
Clusters: [[2, 3, 5, 12, 13, 15, 22, 23, 25, 29, 32, 33, 35, 42, 43, 45, 49, 52, 53, 55, 62, 63, 65, 69, 72, 73, 75, 82, 83, 85, 89, 92, 93, 95, 102, 103, 105, 109], [6, 7, 8, 16, 17, 18, 26, 27, 28, 36, 37, 38, 46, 47, 48, 56, 57, 58, 66, 67, 68, 76, 77, 78, 86, 87, 88, 96, 97, 98, 106, 107, 108], [0, 1, 4, 9, 10, 11, 14, 19, 20, 21, 24, 30, 31, 34, 39, 40, 41, 44, 50, 51, 54, 59, 60, 61, 64, 70, 71, 74, 79, 80, 81, 84, 90, 91, 94, 99, 100, 101, 104]]
K-Medoids Clustering Results:
Medoids: [[0.64035088 0.68421053]
 [0.79824561 0.27192982]
 [0.14035088 0.14035088]]
Clusters: [[2, 3, 5, 12, 13, 15, 22, 23, 25, 29, 32, 33, 35, 42, 43, 45, 49, 52, 53, 55, 62, 63, 65, 69, 72, 73, 75, 79, 82, 83, 85, 89, 92, 93, 95, 99, 102, 103, 105, 109], [6, 7, 8, 16, 17, 18, 19, 26, 27, 28, 36, 37, 38, 39, 46, 47, 48, 56, 57, 58, 59, 66, 67, 68, 76, 77, 78, 86, 87, 88, 96, 97, 98, 106, 107,