In [None]:
from sklearn.cluster import KMeans

In [None]:
import numpy as np
from scipy.stats import multivariate_normal

In [None]:
class GMM:
    def __init__(self, k, method='random_mean_std', max_iter=300, tol=1e-6):
        self.k = k 
        self.method = method
        self.max_iter = max_iter
        self.tol=tol
    
    def init_centers(self, X):
        if self.method == 'random_mean_std':
            pass 
        if self.method == 'random_mean':
            pass
        if self.method == 'k-means':
            kmeans = KMeans(n_clusters = self.k)
            kmeans.fit(X)
            clusters = kmeans.predict(X)
            mean_arr = kmeans.cluster_centers_
            cov_arr = []
            pi_arr = []
            for i in range(self.k):
                X_i = X[clusters==i]
                cov_arr.append(np.cov(X_i.T))
                pi_arr.append(X_i.shape[0]/X.shape[0])
            return mean_arr, np.array(cov_arr), np.array(pi_arr)
    
        if self.method == 'random_divide':
            pass
        if self.method == 'random_gammas':
            pass 
        

    def fit(self, X):
        self.mean_arr, self.cov_arr, self.pi_arr = self.init_centers(X)
        self._loss = self.loss(X,self.mean_arr,self.cov_arr,self.pi_arr)
        for _ in range(self.max_iter):
            gamma_mtrx = self.expectation(X)
            mean_arr, cov_arr, pi_arr = self.maximization(X, gamma_mtrx)
            loss = self.loss(X,mean_arr,cov_arr,pi_arr)
            if loss+self.tol==self._loss: # add tolerance comparison
                break
            self._loss=loss
            self.mean_arr = mean_arr
            self.cov_arr = cov_arr
            self.pi_arr = pi_arr
    
        
    def loss(self, X, mean, cov, pi):
        liklehood=0
        for i in range(X.shape[0]):
            for j in range(self.k):
                liklehood+=np.log(self.pi_arr[j]*self.pdf(X[i],self.mean_arr[j],self.cov_arr[j]))    
        return -(liklehood/x.shape[0])
    
    def pdf(self, x, mean, cov):
        proba=multivariate_normal(mean,cov)
        return proba.pdf(x)
            
    def expectation(self, X):
        gamma_mtrx = np.zeros((X.shape[0], self.k))
        for i, x in enumerate(X):
            for j in range(self.k):
                gamma_mtrx[i][j] = self.pi_arr[j] * self.pdf(x, self.mean_arr[j], self.cov_arr[j])
            gamma_mtrx[i] = gamma_mtrx[i] / gamma_mtrx[i].sum()
            
        return gamma_mtrx

    def maximization(self, X,Y):
        mean_arr=np.zeros((self.k,X.shape[1]))
        covariances = np.zeros((self.k, X.shape[1], X.shape[1]))
        for i in range(self.k):
            covariances[i] = np.eye(X.shape[1])
        
        pi_arr=np.zeros(self.k)
        for i in range(self.k):
            m_c=np.sum(Y[:,i])
            pi_arr[i]=m_c/X.shape[0]
            mean_arr[i]=np.sum(Y[:,i].reshape(-1,1)*X,axis=0)/m_c
            centers=X-mean_arr[i]
            covariances[i]=np.dot((Y[:,i].reshape(-1,1)*centers).T,centers)/m_c
                
            
        return mean_arr,covariances,pi_arr
        
    def predict(self, X):
        matrix=np.zeros((X.shape[0],self.k)) 
        for i in range(self.k):
            matrix[:,i]=np.log(self.pi_arr[i]+self.pdf(X,self.mean_arr[i],self.cov_arr[i]))
        
        return np.argmax(matrix,axis=1)
    
    def predict_proba(self, X):
        proba=self.expectation(X)
        return proba 

In [None]:
from sklearn.datasets import make_blobs

In [None]:
x,y=make_blobs(centers=2)

In [None]:
model=GMM(2,'k-means')

In [None]:
model.fit(x)

In [None]:
model.predict(x)

In [1]:
import os

In [2]:
os.getcwd()

'C:\\Users\\User\\ACA\\Gausian mixture models'