In [37]:
import numpy as np
from sklearn.linear_model.base import BaseEstimator

from utils import compute_labels, log_likelihood, log_likelihood_from_labels

STOP_THRESHOLD = 0.01

class Random(BaseEstimator):
    def __init__(self, n_clusters, n_init=10):
        self.n_clusters = n_clusters
        self.n_init = n_init

    def fit(self, X):
        n_objects = X.shape[0]
        best_log_likelihood = float("-inf")
        for i in range(self.n_init):
            centers_idx = np.random.choice(n_objects, size=self.n_clusters, replace=False)
            mu = X[centers_idx, :]
            labels = compute_labels(X, mu)
            ll = log_likelihood_from_labels(X, labels)
            if ll > best_log_likelihood:
                best_log_likelihood = ll
                self.cluster_centers_ = mu.copy()
                self.labels_ = labels
                
class EM(BaseEstimator):
    def __init__(self, n_clusters, n_init=10):
        self.n_clusters = n_clusters
        self.n_init = n_init
        
    def estep(self, X, w, mu, sigma):

        """Compute Gamma_ik.

        X is n_objects x n_features matrix
        w is a vector of size n_clusters of the prior probabilities of the clusters
        mu is n_clusters x n_features matrix of the centers of the clusters
        sigma is n_clusters x n_features x n_features tensor with the covariance
                matrix of each cluster.
        """
        assert_approx_equal(np.sum(w), 1)
        n_objects, n_features = X.shape
        n_clusters = w.size
        log_gamma = np.zeros((n_objects, n_clusters))
        for cluster in range(n_clusters):
            log_gamma[:, cluster] = np.log(w[cluster])
            log_gamma[:, cluster] += multivariate_normal.logpdf(X, np.exp(mu[cluster, :]),
                                                                np.exp(sigma[cluster, :, :]))                                                            
        norm_coef = logsumexp(log_gamma, axis=1)
        for cluster in range(n_clusters):
            log_gamma[:, cluster] -= norm_coef

        return log_gamma
    
    def mstep(self, X, log_gamma):
        mu = np.zeros((n_clusters, n_features))
        sigma = np.zeros((n_clusters, n_features, n_features))
        mu = np.exp(np.log(np.dot(np.exp(log_gamma).T, X))  
                            - np.tile(logsumexp(log_gamma, axis=0), X.shape[1])) 
        
        n_clusters = self.n_clusters
        n_objects = X.shape[0]

        for cluster in range(n_clusters):
            gamma_k = np.exp(log_gamma[:, cluster])
            mu_k = mu[cluster, :]
            sigma[cluster, :, :] = np.exp(np.log(np.dot(np.tile(gamma_k, n_objects).T), 
                                        np.dot((X - np.tile(mu_k, n_objects)),(X - np.tile(mu_k, n_objects)).T))
                                        - np.tile(logsumexp(log_gamma, axis=0), n_objects))
        
        w = logsumexp(log_gamma, axis=1)/n_objects 
        
        return w, mu, sigma
    
    def fit(X):
        
        n_objects = X.shape[0]
        
        centers_idx = np.random.choice(n_objects, size=self.n_clusters, replace=False)
        mu = X[centers_idx, :] 
        sigma = np.zeros((n_clusters, n_features, n_features))
        w = np.tile(1.0/self.n_clusters, self.n_clusters)
        
        new_log_likelihood = log_likelihood(X, w, mu, sigma)
        
        for cluster in range (self.n_clusters):
            sigma[cluster :, :] = np.dot(X - mu, (X - mu).T)

        for i in range(self.n_init):
            ll = log_likelihood(X, w, mu, sigma)
            if abs(new_log_likelihood - ll) < STOP_THRESHOLD:
                best_log_likelihood = ll
                self.cluster_centers_ = mu.copy()
                self.labels_ = compute_labels
            else:
                
                log_gamma = self.estep(X,w, mu, sigma)
                mu, sigma = self.mstep(log_gamma)
                new_log_likelihood = ll
                i+=1
                if i == self.n_init:
                    print 'There is no convergence'


In [32]:
import numpy as np
x = 
print x
print (np.tile(x, (2,1))).T

[1 2 3 4]
[[1 1]
 [2 2]
 [3 3]
 [4 4]]
