Skip to content

Commit 100ec91

Browse files
committed
提交第 120 天文章《机器学习算法之 K均值聚类》对应示例代码
1 parent 023673c commit 100ec91

File tree

3 files changed

+282
-0
lines changed

3 files changed

+282
-0
lines changed

day-120/Iris.csv

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
2+
1,5.1,3.5,1.4,0.2,Iris-setosa
3+
2,4.9,3.0,1.4,0.2,Iris-setosa
4+
3,4.7,3.2,1.3,0.2,Iris-setosa
5+
4,4.6,3.1,1.5,0.2,Iris-setosa
6+
5,5.0,3.6,1.4,0.2,Iris-setosa
7+
6,5.4,3.9,1.7,0.4,Iris-setosa
8+
7,4.6,3.4,1.4,0.3,Iris-setosa
9+
8,5.0,3.4,1.5,0.2,Iris-setosa
10+
9,4.4,2.9,1.4,0.2,Iris-setosa
11+
10,4.9,3.1,1.5,0.1,Iris-setosa
12+
11,5.4,3.7,1.5,0.2,Iris-setosa
13+
12,4.8,3.4,1.6,0.2,Iris-setosa
14+
13,4.8,3.0,1.4,0.1,Iris-setosa
15+
14,4.3,3.0,1.1,0.1,Iris-setosa
16+
15,5.8,4.0,1.2,0.2,Iris-setosa
17+
16,5.7,4.4,1.5,0.4,Iris-setosa
18+
17,5.4,3.9,1.3,0.4,Iris-setosa
19+
18,5.1,3.5,1.4,0.3,Iris-setosa
20+
19,5.7,3.8,1.7,0.3,Iris-setosa
21+
20,5.1,3.8,1.5,0.3,Iris-setosa
22+
21,5.4,3.4,1.7,0.2,Iris-setosa
23+
22,5.1,3.7,1.5,0.4,Iris-setosa
24+
23,4.6,3.6,1.0,0.2,Iris-setosa
25+
24,5.1,3.3,1.7,0.5,Iris-setosa
26+
25,4.8,3.4,1.9,0.2,Iris-setosa
27+
26,5.0,3.0,1.6,0.2,Iris-setosa
28+
27,5.0,3.4,1.6,0.4,Iris-setosa
29+
28,5.2,3.5,1.5,0.2,Iris-setosa
30+
29,5.2,3.4,1.4,0.2,Iris-setosa
31+
30,4.7,3.2,1.6,0.2,Iris-setosa
32+
31,4.8,3.1,1.6,0.2,Iris-setosa
33+
32,5.4,3.4,1.5,0.4,Iris-setosa
34+
33,5.2,4.1,1.5,0.1,Iris-setosa
35+
34,5.5,4.2,1.4,0.2,Iris-setosa
36+
35,4.9,3.1,1.5,0.1,Iris-setosa
37+
36,5.0,3.2,1.2,0.2,Iris-setosa
38+
37,5.5,3.5,1.3,0.2,Iris-setosa
39+
38,4.9,3.1,1.5,0.1,Iris-setosa
40+
39,4.4,3.0,1.3,0.2,Iris-setosa
41+
40,5.1,3.4,1.5,0.2,Iris-setosa
42+
41,5.0,3.5,1.3,0.3,Iris-setosa
43+
42,4.5,2.3,1.3,0.3,Iris-setosa
44+
43,4.4,3.2,1.3,0.2,Iris-setosa
45+
44,5.0,3.5,1.6,0.6,Iris-setosa
46+
45,5.1,3.8,1.9,0.4,Iris-setosa
47+
46,4.8,3.0,1.4,0.3,Iris-setosa
48+
47,5.1,3.8,1.6,0.2,Iris-setosa
49+
48,4.6,3.2,1.4,0.2,Iris-setosa
50+
49,5.3,3.7,1.5,0.2,Iris-setosa
51+
50,5.0,3.3,1.4,0.2,Iris-setosa
52+
51,7.0,3.2,4.7,1.4,Iris-versicolor
53+
52,6.4,3.2,4.5,1.5,Iris-versicolor
54+
53,6.9,3.1,4.9,1.5,Iris-versicolor
55+
54,5.5,2.3,4.0,1.3,Iris-versicolor
56+
55,6.5,2.8,4.6,1.5,Iris-versicolor
57+
56,5.7,2.8,4.5,1.3,Iris-versicolor
58+
57,6.3,3.3,4.7,1.6,Iris-versicolor
59+
58,4.9,2.4,3.3,1.0,Iris-versicolor
60+
59,6.6,2.9,4.6,1.3,Iris-versicolor
61+
60,5.2,2.7,3.9,1.4,Iris-versicolor
62+
61,5.0,2.0,3.5,1.0,Iris-versicolor
63+
62,5.9,3.0,4.2,1.5,Iris-versicolor
64+
63,6.0,2.2,4.0,1.0,Iris-versicolor
65+
64,6.1,2.9,4.7,1.4,Iris-versicolor
66+
65,5.6,2.9,3.6,1.3,Iris-versicolor
67+
66,6.7,3.1,4.4,1.4,Iris-versicolor
68+
67,5.6,3.0,4.5,1.5,Iris-versicolor
69+
68,5.8,2.7,4.1,1.0,Iris-versicolor
70+
69,6.2,2.2,4.5,1.5,Iris-versicolor
71+
70,5.6,2.5,3.9,1.1,Iris-versicolor
72+
71,5.9,3.2,4.8,1.8,Iris-versicolor
73+
72,6.1,2.8,4.0,1.3,Iris-versicolor
74+
73,6.3,2.5,4.9,1.5,Iris-versicolor
75+
74,6.1,2.8,4.7,1.2,Iris-versicolor
76+
75,6.4,2.9,4.3,1.3,Iris-versicolor
77+
76,6.6,3.0,4.4,1.4,Iris-versicolor
78+
77,6.8,2.8,4.8,1.4,Iris-versicolor
79+
78,6.7,3.0,5.0,1.7,Iris-versicolor
80+
79,6.0,2.9,4.5,1.5,Iris-versicolor
81+
80,5.7,2.6,3.5,1.0,Iris-versicolor
82+
81,5.5,2.4,3.8,1.1,Iris-versicolor
83+
82,5.5,2.4,3.7,1.0,Iris-versicolor
84+
83,5.8,2.7,3.9,1.2,Iris-versicolor
85+
84,6.0,2.7,5.1,1.6,Iris-versicolor
86+
85,5.4,3.0,4.5,1.5,Iris-versicolor
87+
86,6.0,3.4,4.5,1.6,Iris-versicolor
88+
87,6.7,3.1,4.7,1.5,Iris-versicolor
89+
88,6.3,2.3,4.4,1.3,Iris-versicolor
90+
89,5.6,3.0,4.1,1.3,Iris-versicolor
91+
90,5.5,2.5,4.0,1.3,Iris-versicolor
92+
91,5.5,2.6,4.4,1.2,Iris-versicolor
93+
92,6.1,3.0,4.6,1.4,Iris-versicolor
94+
93,5.8,2.6,4.0,1.2,Iris-versicolor
95+
94,5.0,2.3,3.3,1.0,Iris-versicolor
96+
95,5.6,2.7,4.2,1.3,Iris-versicolor
97+
96,5.7,3.0,4.2,1.2,Iris-versicolor
98+
97,5.7,2.9,4.2,1.3,Iris-versicolor
99+
98,6.2,2.9,4.3,1.3,Iris-versicolor
100+
99,5.1,2.5,3.0,1.1,Iris-versicolor
101+
100,5.7,2.8,4.1,1.3,Iris-versicolor
102+
101,6.3,3.3,6.0,2.5,Iris-virginica
103+
102,5.8,2.7,5.1,1.9,Iris-virginica
104+
103,7.1,3.0,5.9,2.1,Iris-virginica
105+
104,6.3,2.9,5.6,1.8,Iris-virginica
106+
105,6.5,3.0,5.8,2.2,Iris-virginica
107+
106,7.6,3.0,6.6,2.1,Iris-virginica
108+
107,4.9,2.5,4.5,1.7,Iris-virginica
109+
108,7.3,2.9,6.3,1.8,Iris-virginica
110+
109,6.7,2.5,5.8,1.8,Iris-virginica
111+
110,7.2,3.6,6.1,2.5,Iris-virginica
112+
111,6.5,3.2,5.1,2.0,Iris-virginica
113+
112,6.4,2.7,5.3,1.9,Iris-virginica
114+
113,6.8,3.0,5.5,2.1,Iris-virginica
115+
114,5.7,2.5,5.0,2.0,Iris-virginica
116+
115,5.8,2.8,5.1,2.4,Iris-virginica
117+
116,6.4,3.2,5.3,2.3,Iris-virginica
118+
117,6.5,3.0,5.5,1.8,Iris-virginica
119+
118,7.7,3.8,6.7,2.2,Iris-virginica
120+
119,7.7,2.6,6.9,2.3,Iris-virginica
121+
120,6.0,2.2,5.0,1.5,Iris-virginica
122+
121,6.9,3.2,5.7,2.3,Iris-virginica
123+
122,5.6,2.8,4.9,2.0,Iris-virginica
124+
123,7.7,2.8,6.7,2.0,Iris-virginica
125+
124,6.3,2.7,4.9,1.8,Iris-virginica
126+
125,6.7,3.3,5.7,2.1,Iris-virginica
127+
126,7.2,3.2,6.0,1.8,Iris-virginica
128+
127,6.2,2.8,4.8,1.8,Iris-virginica
129+
128,6.1,3.0,4.9,1.8,Iris-virginica
130+
129,6.4,2.8,5.6,2.1,Iris-virginica
131+
130,7.2,3.0,5.8,1.6,Iris-virginica
132+
131,7.4,2.8,6.1,1.9,Iris-virginica
133+
132,7.9,3.8,6.4,2.0,Iris-virginica
134+
133,6.4,2.8,5.6,2.2,Iris-virginica
135+
134,6.3,2.8,5.1,1.5,Iris-virginica
136+
135,6.1,2.6,5.6,1.4,Iris-virginica
137+
136,7.7,3.0,6.1,2.3,Iris-virginica
138+
137,6.3,3.4,5.6,2.4,Iris-virginica
139+
138,6.4,3.1,5.5,1.8,Iris-virginica
140+
139,6.0,3.0,4.8,1.8,Iris-virginica
141+
140,6.9,3.1,5.4,2.1,Iris-virginica
142+
141,6.7,3.1,5.6,2.4,Iris-virginica
143+
142,6.9,3.1,5.1,2.3,Iris-virginica
144+
143,5.8,2.7,5.1,1.9,Iris-virginica
145+
144,6.8,3.2,5.9,2.3,Iris-virginica
146+
145,6.7,3.3,5.7,2.5,Iris-virginica
147+
146,6.7,3.0,5.2,2.3,Iris-virginica
148+
147,6.3,2.5,5.0,1.9,Iris-virginica
149+
148,6.5,3.0,5.2,2.0,Iris-virginica
150+
149,6.2,3.4,5.4,2.3,Iris-virginica
151+
150,5.9,3.0,5.1,1.8,Iris-virginica

day-120/__init__.py

Whitespace-only changes.

day-120/clusters.py

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
'''
2+
__author__ = justdopython.com
3+
'''
4+
import numpy as np
5+
import pandas as pd
6+
import random
7+
8+
9+
class Clusters():
10+
def __init__(self, train_data, K):
11+
'''
12+
:params train_data: ndarray. 训练数据.
13+
:params K: int. 要划分的簇的数量.
14+
15+
:attr train_data:
16+
'''
17+
super().__init__()
18+
19+
self.train_data = train_data
20+
self.K = K
21+
# 标记聚类是否完成。具体的真假,取决于是否还存在需要从一个簇移动到另一个簇的数据
22+
self.finished = False
23+
24+
# 随机选取 K 个数据作为各个簇的中心点
25+
index = random.sample(range(len(self.train_data)), self.K)
26+
self.centroid = train_data[index, 1:5]
27+
28+
# 将训练数据均匀分配到各个簇,以便以同一的形式适用于数据的分配
29+
self.clusters = []
30+
offset = len(train_data) // self.K
31+
for i in range(self.K):
32+
start = offset * i
33+
if i < self.K-1:
34+
self.clusters.append(train_data[start:start+offset,:])
35+
else:
36+
# 最后一个簇包含剩下的所有数据
37+
self.clusters.append(train_data[start:,:])
38+
39+
40+
# 加载所要用到的数据集
41+
@staticmethod
42+
def getData():
43+
'''
44+
获取数据,返回值类型为 ndarray
45+
'''
46+
train_data = pd.read_csv('iris.csv').to_numpy()
47+
48+
return train_data
49+
50+
# 将各数据分配到每个簇中去
51+
def assign(self):
52+
self.finished = True
53+
# data_index_list 和 target_index_list 分别记录“需要移动的数据在当前簇中的索引”以及“要移动到的目标簇索引”
54+
target_index_list = []
55+
data_index_list = []
56+
for i in range(self.K):
57+
target_index_list.append([])
58+
data_index_list.append([])
59+
60+
for cluster_index in range(len(self.clusters)):
61+
for data_index in range(len(self.clusters[cluster_index])):
62+
diff = self.clusters[cluster_index][data_index, 1:5] - self.centroid
63+
distance_square = np.sum(diff * diff, axis=1)
64+
target_index = np.argmin(distance_square)
65+
66+
if cluster_index != target_index:
67+
self.finished = False
68+
target_index_list[cluster_index].append(target_index)
69+
data_index_list[cluster_index].append(data_index)
70+
71+
for cluster_index in range(self.K):
72+
for index in range(len(target_index_list[cluster_index])):
73+
target_index = target_index_list[cluster_index][index]
74+
data_index = data_index_list[cluster_index][index]
75+
76+
self.clusters[target_index] = np.append(self.clusters[target_index],
77+
self.clusters[cluster_index][data_index, :]).reshape(-1, 6)
78+
79+
for cluster_index in range(self.K):
80+
data_index = data_index_list[cluster_index]
81+
self.clusters[cluster_index] = np.delete(self.clusters[cluster_index], data_index, axis=0)
82+
83+
84+
# 更新各个簇的质心
85+
def update(self):
86+
for cluster_index in range(len(self.clusters)):
87+
self.centroid[cluster_index] = np.mean(self.clusters[cluster_index][:,1:5], axis=0)
88+
89+
def train(self):
90+
'''
91+
进行聚类训练
92+
'''
93+
while not self.finished:
94+
self.assign()
95+
self.update()
96+
print('训练完成!!!')
97+
98+
def printResult(self):
99+
'''
100+
打印聚类结果
101+
'''
102+
print('-'*80)
103+
print('*'*80)
104+
print('-'*80)
105+
print('*'*30, '聚类结果', '*'*30)
106+
print('-'*30,'各簇中心','-'*30)
107+
for i in range(self.K):
108+
print('第', str(i), '簇中心:', self.centroid[i])
109+
print('-'*80)
110+
print('-'*30,'各簇结果','-'*30)
111+
for i in range(self.K):
112+
print('-'*20, '第', str(i), '簇结果', '-'*20,)
113+
for d in self.clusters[i]:
114+
print(d[5])
115+
116+
print('-'*80)
117+
print('*'*80)
118+
print('-'*80)
119+
120+
121+
122+
123+
if __name__ == '__main__':
124+
print('-'*80)
125+
K = int(input('请输入要划分的簇数(应为正整数):'))
126+
data = Clusters.getData()
127+
clusters = Clusters(data, K)
128+
129+
clusters.train()
130+
131+
clusters.printResult()

0 commit comments

Comments
 (0)