In [33]:
## KMeans

import random

def vector_add(v, w):
    return [v_i + w_i for v_i, w_i in zip(v, w)]

def vector_sum(vectors):
    result = vectors[0]
    for vector in vectors[1:]:
        result = vector_add(result, vector)
    return result

def vector_subtract(v, w):
    return [v_i - w_i for v_i, w_i in zip(v, w)]

def scalar_multiply(c, v):
    return [c * v_i for v_i in v]

def vector_mean(vectors):
    n = len(vectors)
    return scalar_multiply(1./n, vector_sum(vectors))

def dot(v, w):
    return sum(v_i * w_i for v_i, w_i in zip(v, w))

def sum_of_squares(v):
    return dot(v, v)

def squared_distance(v, w):
    return sum_of_squares(vector_subtract(v, w))

class KMeans:
    def __init__(self, k):
        self.k = k          # number of clusters
        self.means = None   # means of clusters
    
    def classify(self, input):
        return min(range(self.k), key=lambda i: squared_distance(input, self.means[i]))
       
    def train(self, inputs):
        # choose k random points as the initial means
        self.means = random.sample(inputs, self.k)
        assignments = None
        while True:
            # Find new assignments
            new_assignments = map(self.classify, inputs)
            # If no assignments have changed, we're done.
            if assignments == new_assignments:
            # Otherwise keep the new assignments,
                return
            assignments = new_assignments
               # And compute new means based on the new assignments
            for i in range(self.k):
                   # find all the points assigned to cluster i
                i_points = [p for p, a in zip(inputs, assignments) if a == i]
                   # make sure i_points is not empty so don't divide by 0
                if i_points:
                    self.means[i] = vector_mean(i_points)
                    

random.seed(0)
inputs = [random.random() for _ in range(20)]

print inputs
clusterer = KMeans(3)
clusterer.train(inputs)
#print clusterer.means
# so you get the same results as me

[0.8444218515250481, 0.7579544029403025, 0.420571580830845, 0.25891675029296335, 0.5112747213686085, 0.4049341374504143, 0.7837985890347726, 0.30331272607892745, 0.4765969541523558, 0.5833820394550312, 0.9081128851953352, 0.5046868558173903, 0.28183784439970383, 0.7558042041572239, 0.6183689966753316, 0.25050634136244054, 0.9097462559682401, 0.9827854760376531, 0.8102172359965896, 0.9021659504395827]


TypeError: zip argument #1 must support iteration

In [24]:
print random.sample(1)

TypeError: sample() takes exactly 3 arguments (2 given)