# Ejemplo de implementación de kmeans

In [1]:
%pylab
%matplotlib inline

Using matplotlib backend: MacOSX
Populating the interactive namespace from numpy and matplotlib


## Creación de un conjunto de datos de ejemplo

In [2]:
from sklearn.datasets import make_blobs

color_map = array(['b','g','r','c','m','y','k','b','g','r',
                   'c','m','y','k','b','g','r','c','m','y',
                   'k','b','g','r','c','m','y','k'])

blobs_3, classes_3 = make_blobs(300,
                                centers      = 3,
                                cluster_std  = 0.5,
                                random_state = 0)
blobs_5, classes_5 = make_blobs(300,
                                centers      = 5,
                                cluster_std  = 0.5,
                                random_state = 0)

## Implementación de k-means

In [3]:
from scipy.spatial.distance import cdist

def shout_continue(old_centroids, centroids, iterations, max_iter = 10, stop_limit = 0):    
    if old_centroids is None:
        return True
    elif iterations > max_iter:
        return False
    elif sum(abs(centroids - old_centroids)) <= stop_limit:
        return False
    else:
        return True

def k_means(data, num_k):
    num_features = data.shape[1]
    num_values = data.shape[0]

    centroids = np.random.randn(num_k * num_features).reshape(num_k, num_features)
    
    iterations = 0
    old_centroids = None

    while shout_continue(old_centroids, centroids, iterations):
        iterations += 1
        old_centroids = centroids.copy()

        distance = cdist(data, centroids)
        labels = np.zeros(num_values)
        for i in range(num_values):
            labels[i] = numpy.argmin(distance[i])

        for i in range(num_k):
            centroids[i] = mean(data[labels == i, :], axis = 0)

        centroids[np.where(np.isnan(centroids))] = 0
        
        print "Iteración:", iterations
        print centroids
        print 
        
    return centroids

k_means_3 = k_means(blobs_3, 3)
k_means_5 = k_means(blobs_5, 5)

Iteración: 1
[[-0.8676141   3.38628804]
 [ 1.77448225  1.98751104]
 [ 0.          0.        ]]

Iteración: 2
[[-0.61908121  3.52659864]
 [ 1.89669384  1.68858372]
 [ 1.60602218  0.16407947]]

Iteración: 3
[[-0.36876936  3.61435545]
 [ 1.98995086  1.44481736]
 [ 1.91982226  0.50180782]]

Iteración: 4
[[-0.34105117  3.61487282]
 [ 1.96692135  1.40337951]
 [ 1.97168006  0.56041412]]

Iteración: 5
[[-0.34105117  3.61487282]
 [ 1.92010354  1.41431769]
 [ 2.00180835  0.56695345]]

Iteración: 6
[[-0.34105117  3.61487282]
 [ 1.87639467  1.4102323 ]
 [ 2.03000762  0.56958918]]

Iteración: 7
[[-0.34105117  3.61487282]
 [ 1.81959565  1.43707633]
 [ 2.05524954  0.59310764]]

Iteración: 8
[[-0.34105117  3.61487282]
 [ 1.803074    1.46066622]
 [ 2.05684581  0.60597766]]

Iteración: 9
[[-0.34105117  3.61487282]
 [ 1.803074    1.46066622]
 [ 2.05684581  0.60597766]]

Iteración: 1
[[ 0.          0.        ]
 [ 5.58004648 -0.77966805]
 [ 0.          0.        ]
 [ 0.          0.        ]
 [-0.64068852  

  ret, rcount, out=ret, casting='unsafe', subok=False)


## Comparación de los resultados

Ejemplo con 3 clusters.

In [4]:
from sklearn.cluster import KMeans

kmeans_3 = KMeans(n_clusters = 3).fit(blobs_3)

print "Algoritmo implementado"
print k_means_3
print "Algortimo sklearn"
print kmeans_3.cluster_centers_

Algoritmo implementado
[[-0.34105117  3.61487282]
 [ 1.803074    1.46066622]
 [ 2.05684581  0.60597766]]
Algortimo sklearn
[[ 1.96887266  0.84517309]
 [-1.59458393  2.86866086]
 [ 0.95959254  4.36085244]]


Ejemplo con 3 clusters.

In [5]:
kmeans_5 = KMeans(n_clusters = 5).fit(blobs_5)

print "Algoritmo implementado"
print k_means_5
print "Algortimo sklearn"
print kmeans_5.cluster_centers_

Algoritmo implementado
[[ 2.01255789  0.85271039]
 [ 9.14753506 -2.41204648]
 [ 0.23806042  4.0819488 ]
 [-1.74120135  2.63955662]
 [-1.24361702  7.76688706]]
Algortimo sklearn
[[ 9.14753506 -2.41204648]
 [ 0.96667719  4.40575097]
 [-1.24361702  7.76688706]
 [ 2.01255789  0.85271039]
 [-1.64512572  2.91675119]]
