forked from misbahulard/Machine-Learning
-
Notifications
You must be signed in to change notification settings - Fork 0
/
kmeans.py
100 lines (81 loc) · 2.75 KB
/
kmeans.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
from copy import deepcopy
from sklearn.cluster import KMeans
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import operator
import math
# Euclidean Distance Caculator
def dist(a, b, ax=1):
return np.linalg.norm(a - b, axis=ax)
# Fungsi untuk menghitung jarak euclidean
def errorCentroid(a, b, length):
distance = 0
for x in range(length):
for y in range(2):
# total terlebih dahulu baru di akar (test - training)^2 + ...
distance += pow((a[x][y] - b[x][y]), 2)
return math.sqrt(distance)
# Fungsi untuk menghitung jarak euclidean
def euclideanDistance(a, b, length):
distances = []
for x in range(length):
distance = 0
for y in range(2):
# total terlebih dahulu baru di akar (test - training)^2 + ...
distance += pow((a[y] - b[x][y]), 2)
# print(pow((a[y] - b[x][y]), 2))
distances.append(math.sqrt(distance))
return np.array(distances)
def main():
df = pd.read_csv('ruspini.csv', names=['x', 'y', 'z'])
# df = pd.read_csv('xclara.csv')
print(len(df))
f1 = df['x'].values
f2 = df['y'].values
# f1 = df['V1'].values
# f2 = df['V2'].values
X = np.array(list(zip(f1, f2)))
plt.scatter(f1, f2, c='red', s=10)
# print("Input k: ")
# k = int(input())
k = 4
C_x = np.random.randint(0, np.max(X), size=k)
C_y = np.random.randint(0, np.max(X), size=k)
C = np.array(list(zip(C_x, C_y)), dtype=np.float32)
print("Intial Centroids: ")
print(C)
print("=========================\n")
plt.scatter(C_x, C_y, marker="*", c="black", s=10)
C_old = np.zeros(C.shape)
clusters = np.zeros(len(X))
# print(cluster)
error = errorCentroid(C, C_old, k)
print(error)
while error != 0:
# Hitung jarak data dengan centroit
for i in range(len(X)):
distances = euclideanDistance(X[i], C, k)
cluster = np.argmin(distances)
clusters[i] = cluster
# simpan centroid lama
C_old = deepcopy(C)
for i in range(k):
points = [X[j] for j in range(len(X)) if clusters[j] == i]
C[i] = np.mean(points, axis=0)
# Hitung error
print("\nCentroid baru: ")
print(C)
print("-------------------------")
error = errorCentroid(C, C_old, k)
for i in range(len(X)):
print("Data: ", X[i], " cluster: ", clusters[i])
colors = ['r', 'g', 'b', 'y', 'c', 'm']
fig, ax = plt.subplots()
for i in range(k):
points = np.array([X[j] for j in range(len(X)) if clusters[j] == i])
ax.scatter(points[:, 0], points[:, 1], c=colors[i], s=10)
ax.scatter(C[:, 0], C[:, 1], c='black', s=10)
plt.show()
if __name__ == "__main__":
main()