forked from nlintz/TensorFlow-Tutorials
-
Notifications
You must be signed in to change notification settings - Fork 0
/
functions.py
101 lines (85 loc) · 3.86 KB
/
functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import tensorflow as tf
import numpy as np
#=================
def test(x,y):
print(x)
def plot_clusters(all_samples, centroids, n_samples_per_cluster):
# Plot out the different clusters
# Choose a different colour for each cluster
import matplotlib.pyplot as plt
colour = plt.cm.rainbow(np.linspace(0,1,len(centroids)))
markers=["x", "o", "+"]
for i, centroid in enumerate(centroids):
# Grab just the samples fpr the given cluster and plot them out with a new colour
samples = all_samples[i*n_samples_per_cluster:(i+1)*n_samples_per_cluster]
plt.scatter(samples[:,0], samples[:,1], c=colour[i])
# Also plot centroid
#plt.plot(centroid[0], centroid[1], markersize=35, marker="x"+str(i), color='k', mew=10)
#ValueError: Unrecognized marker style x0
plt.plot(centroid[0], centroid[1], markersize=35, marker=markers[i], color='k', mew=10)
plt.plot(centroid[0], centroid[1], markersize=30, marker=markers[i], color='m', mew=5)
plt.show()
#==================================
def create_samples(n_clusters, n_samples_per_cluster, n_features, embiggen_factor, seed):
np.random.seed(seed)
slices = []
centroids = []
# Create samples for each cluster
for i in range(n_clusters):
samples = tf.random_normal((n_samples_per_cluster, n_features),
mean=0.0, stddev=5.0, dtype=tf.float32, seed=seed, name="cluster_{}".format(i))
tmp=np.random.rand(1,n_features)
tmp=tmp.astype(np.float32)
current_centroid = (tmp * embiggen_factor) - (embiggen_factor/2)
#print(current_centroid.shape)
#print(type(current_centroid[0][0]))
centroids.append(current_centroid)
samples += current_centroid
slices.append(samples)
# Create a big "samples" dataset
samples = tf.concat(0, slices, name='samples')
centroids = tf.concat(0, centroids, name='centroids')
return centroids, samples
#============
def choose_random_centroids(samples, n_clusters, *args):
#for ar in args:
# print ar
seed=0
if len(args)>0 :
seed=args[0]
print(type(seed))
# Step 0: Initialisation: Select `n_clusters` number of random points
n_samples = tf.shape(samples)[0]
random_indices = tf.random_shuffle(tf.range(0, n_samples), seed=seed)
begin = [0,]
size = [n_clusters,]
size[0] = n_clusters
centroid_indices = tf.slice(random_indices, begin, size)
initial_centroids = tf.gather(samples, centroid_indices)
return initial_centroids
#===================
def assign_to_nearest(samples, centroids):
# Finds the nearest centroid for each sample
# START from http://esciencegroup.com/2016/01/05/an-encounter-with-googles-tensorflow/
expanded_vectors = tf.expand_dims(samples, 0)
expanded_centroids = tf.expand_dims(centroids, 1)
distances = tf.reduce_sum( tf.square(
tf.sub(expanded_vectors, expanded_centroids)), 2)
mins = tf.argmin(distances, 0)
# END from http://esciencegroup.com/2016/01/05/an-encounter-with-googles-tensorflow/
nearest_indices = mins
return nearest_indices
#===
def update_centroids(samples, nearest_indices, n_clusters):
# Updates the centroid to be the mean of all samples associated with it.
nearest_indices = tf.to_int32(nearest_indices)
partitions = tf.dynamic_partition(samples, nearest_indices, n_clusters)
new_centroids = tf.concat(0, [tf.expand_dims(tf.reduce_mean(partition, 0), 0) for partition in partitions])
return new_centroids
"""
This code takes the nearest indices for each sample, and grabs those out as separate groups
using tf.dynamic_partition.
From here, we use tf.reduce_mean on a single group to find the average of that group,
forming its new centroid.
From here, we just tf.concat them together to form our new centroids.
"""