In [11]:
from sklearn.mixture import GaussianMixture
import numpy as np
from utils import Givens2Matrix, QRGivens, eigh_with_fixed_direction_range, find_closest_spd
from utils import load_cloud_dataset, load_breast_cancer, load_seg_data, load_digits_dataset, load_satelite_dataset, load_synthetic_dataset
from python_example import Givens2Matrix_double as Givens2Matrix
from python_example import QRGivens_double as QRGivens
from copy import deepcopy

In [12]:
import warnings
from sklearn.exceptions import ConvergenceWarning, ComplexWarning
warnings.filterwarnings(action='ignore', category=ComplexWarning)
warnings.filterwarnings(action='ignore', category=ConvergenceWarning)

ImportError: cannot import name 'ComplexWarning' from 'sklearn.exceptions' (/home/skholkin/projects/python_venv/lib/python3.10/site-packages/sklearn/exceptions.py)

In [5]:

class EMParticle:
    def __init__(self, weights, means, precision_matrices, eigvals_coef, means_coef) -> None:
        self.weights = deepcopy(weights)
        self.means = deepcopy(means)
        self.precision_matrices = deepcopy(precision_matrices)
        self.eigvals_coef = eigvals_coef
        self.n_comp = self.weights.shape[0]
        self.data_dim = self.means.shape[1]
        self.means_coef = means_coef

    def inject_noise(self):
        
        eigvals = [np.mean(np.linalg.eigvals(self.precision_matrices[i])) for i in range(self.n_comp)]
        
        eig_val_max  = [eigvals[i] * self.eigvals_coef for i in range(len(eigvals))]

        for i in range(self.n_comp):
            
            givens_angles = np.random.uniform(-np.pi, np.pi, size=(int(self.data_dim * (self.data_dim - 1) / 2)))
            delta_eigvals = np.random.uniform(0, np.mean(eig_val_max), size=self.data_dim)
            # print('eig', np.mean(eig_val_max))
            
            v = Givens2Matrix(np.expand_dims(givens_angles, axis=1))
            addition = v @ np.diag(delta_eigvals) @ v.T

            self.precision_matrices += addition
        # print(np.mean(self.means))
        means_delta = np.random.normal(0, self.means_coef * np.mean(self.means), size=(self.n_comp, self.data_dim))
        # print(self.means.shape, means_delta.shape)
        self.means += means_delta

    def run_em_iters(self, iters, data):
        gmm = GaussianMixture(n_components=self.weights.shape[0], covariance_type='full',
         weights_init=self.weights, means_init=self.means, precisions_init=self.precision_matrices, max_iter=iters)
        gmm.fit(data)
        self.weights  = gmm.weights_
        self.means = gmm.means_
        self.precision_matrices = gmm.precisions_
        self.curr_score = gmm.score(data)
        return gmm.score(data)

class NoiseEM:
    def __init__(self, n_comp, n_particles, T1, T2, eigval_coef, means_coef) -> None:
        self.particles = [None for i in range(n_particles)]
        self.n_comp = n_comp
        self.T1 = T1
        self.T2 = T2
        self.eigval_coef = eigval_coef
        self.means_coef = means_coef

    def run(self, data):
        best_score = -np.inf
        best_particle = None
        for i in range(len(self.particles)):
            particle_gmm = GaussianMixture(self.n_comp, covariance_type='full', max_iter=1, n_init=1, init_params='k-means++')
            particle_gmm.fit(data)
            self.particles[i] = EMParticle(particle_gmm.weights_, particle_gmm.means_, particle_gmm.precisions_, self.eigval_coef, self.means_coef)
            
        for i in range(self.T1):
            for particle in self.particles:
                ll = particle.run_em_iters(self.T2, data)
                print('LL: ', ll)
                if ll > best_score:
                    best_score = ll
                    best_particle = deepcopy(particle)
                # particle.inject_noise()
            
            for j in range(len(self.particles)):
                self.particles[j] = EMParticle(best_particle.weights, best_particle.means, best_particle.precision_matrices, self.eigval_coef, self.means_coef)
                self.particles[j].inject_noise()

        for particle in self.particles:
            ll = particle.run_em_iters(self.T2, data)
            if ll > best_score:
                best_score = ll
            print('Final LL: ', ll)
        print('Best LL: ', best_score)

In [10]:
data = load_seg_data()
T1 = 20
T2 = 50
n_comp = 10
n_particles = 50

eigvals_coef = 0.1
means_coef = 0.1

em = NoiseEM(n_comp, n_particles, T1, T2, eigvals_coef, means_coef)

with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    em.run(data)

LL:  67.95383178724201
LL:  69.03302951217016
LL:  70.12836576470454
LL:  67.14508727594611
LL:  66.59245555928614
LL:  67.81617301743745
LL:  67.29297442341765
LL:  66.17439759662396
LL:  67.90786019842108
LL:  69.17405933433008
LL:  66.47924795691272
LL:  66.6273011378222
LL:  67.04997770076135
LL:  68.07445734387129
LL:  68.78828639659312
LL:  69.26496612265555
LL:  65.04183842353572
LL:  68.51877541025527
LL:  67.49950276384261
LL:  66.5352733493941
LL:  67.44013562403295
LL:  67.28888427932685
LL:  67.53709737285034
LL:  67.13972211081376
LL:  66.65609585702543
LL:  67.91832074289768
LL:  66.9867011600117
LL:  67.91971903730719
LL:  67.47299123682491
LL:  67.72240384367643
LL:  68.64080222159618
LL:  68.96031725681362
LL:  68.55153742895035
LL:  69.01121970386198
LL:  68.91563339478574
LL:  67.06725014841744
LL:  67.68636159220661
LL:  68.806803655696
LL:  69.63841081144034
LL:  67.62986483009392
LL:  70.05724610738896
LL:  69.24803200540971
LL:  68.8250460945913
LL:  66.939562277

KeyboardInterrupt: 