<a href="https://colab.research.google.com/github/FelipeTufaile/MixtureModels/blob/main/MixtureModels/tests/Tests.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
"""Mixture model for collaborative filtering"""
from typing import NamedTuple, Tuple
import numpy as np
from matplotlib import pyplot as plt
from matplotlib.patches import Circle, Arc

In [4]:
class GaussianMixture():
  """
  Tuple holding a gaussian mixture

  mu: np.ndarray  # (K, d) array - each row corresponds to a gaussian component mean
  var: np.ndarray  # (K, ) array - each row corresponds to the variance of a component
  p: np.ndarray  # (K, ) array = each row corresponds to the weight of a component
  """

  def __init__(self, k, seed = 0):
    """
    Initializes the gaussian mixture model.

    Args:
        K (int): number of components
        seed (int): random seed

    Returns:
        mixture: the initialized gaussian mixture model
    """
    # Storing k
    self.k = k

    # Storing seed
    self.seed = seed

    # Initializing the weight of each component (cluster)
    self.p = np.ones(k) / k

    # Initialize the mean array as an array of zeros
    self.mu = np.zeros(self.k)

    # Initialize the variance array as an array of zeros
    self.var = np.zeros(self.k)

In [6]:
gm = GaussianMixture(k=3)

In [9]:
gm.var

array([0., 0., 0.])

In [None]:
def __init__(self, x, k, seed = 0):
    """Initializes the mixture model with random points as initial
    means and uniform assingments

    Args:
        X: (n, d) array holding the data
        K: number of components
        seed: random seed

    Returns:
        mixture: the initialized gaussian mixture
        post: (n, K) array holding the soft counts
            for all components for all examples
    """
    # Storing x
    self.x = x

    # Storing k
    self.k = k

    # Storing seed
    self.seed = 0

    # Setting seed
    np.random.seed(seed)

    # Calculating the number of samples "n" in the data
    n, _ = x.shape

    # Initializing the weight of each component (cluster)
    self.p = np.ones(k) / k

    # Select k random points from the the samples as initial means
    self.mu = x[np.random.choice(n, k, replace=False)]

    # Initialize the variance object as a vector of zeros
    self.var = np.zeros(self.k)

    # Compute variance for each component (cluster)
    for j in range(self.k):
        self.var[j] = ((x -self.mu[j])**2).mean()

    # Calculating posterior
    self.post = np.ones((n, self.k)) / self.k