In [45]:
from sklearn.mixture import GaussianMixture
import numpy as np
from utils import Givens2Matrix, QRGivens, eigh_with_fixed_direction_range, find_closest_spd
from utils import load_cloud_dataset, load_breast_cancer, load_seg_data, load_digits_dataset, load_satelite_dataset, load_synthetic_dataset
from python_example import Givens2Matrix_double as Givens2Matrix
from python_example import QRGivens_double as QRGivens
from copy import deepcopy

In [20]:
import warnings
from sklearn.exceptions import ConvergenceWarning, ComplexWarning
warnings.filterwarnings(action='ignore', category=ComplexWarning)
warnings.filterwarnings(action='ignore', category=ConvergenceWarning)

ImportError: cannot import name 'ComplexWarning' from 'sklearn.exceptions' (/home/skholkin/projects/python_venv/lib/python3.10/site-packages/sklearn/exceptions.py)

In [61]:

class EMParticle:
    def __init__(self, weights, means, precision_matrices, eigvals_coef, means_coef) -> None:
        self.weights = deepcopy(weights)
        self.means = deepcopy(means)
        self.precision_matrices = deepcopy(precision_matrices)
        self.eigvals_coef = eigvals_coef
        self.n_comp = self.weights.shape[0]
        self.data_dim = self.means.shape[1]
        self.means_coef = means_coef

    def inject_noise(self):
        
        eigvals = [np.mean(np.linalg.eigvals(self.precision_matrices[i])) for i in range(self.n_comp)]
        
        eig_val_max  = [eigvals[i] * self.eigvals_coef for i in range(len(eigvals))]

        for i in range(self.n_comp):
            
            givens_angles = np.random.uniform(-np.pi, np.pi, size=(int(self.data_dim * (self.data_dim - 1) / 2)))
            delta_eigvals = np.random.uniform(0, np.mean(eig_val_max), size=self.data_dim)
            # print('eig', np.mean(eig_val_max))
            
            v = Givens2Matrix(np.expand_dims(givens_angles, axis=1))
            addition = v @ np.diag(delta_eigvals) @ v.T

            self.precision_matrices += addition
        # print(np.mean(self.means))
        means_delta = np.random.normal(0, self.means_coef * np.mean(self.means), size=(self.n_comp, self.data_dim))
        # print(self.means.shape, means_delta.shape)
        self.means += means_delta

    def run_em_iters(self, iters, data):
        gmm = GaussianMixture(n_components=self.weights.shape[0], covariance_type='full',
         weights_init=self.weights, means_init=self.means, precisions_init=self.precision_matrices, max_iter=iters)
        gmm.fit(data)
        self.weights  = gmm.weights_
        self.means = gmm.means_
        self.precision_matrices = gmm.precisions_
        self.curr_score = gmm.score(data)
        return gmm.score(data)

class NoiseEM:
    def __init__(self, n_comp, n_particles, T1, T2, eigval_coef, means_coef) -> None:
        self.particles = [None for i in range(n_particles)]
        self.n_comp = n_comp
        self.T1 = T1
        self.T2 = T2
        self.eigval_coef = eigval_coef
        self.means_coef = means_coef

    def run(self, data):
        best_score = -np.inf
        best_particle = None
        for i in range(len(self.particles)):
            particle_gmm = GaussianMixture(self.n_comp, covariance_type='full', max_iter=1, n_init=1, init_params='k-means++')
            particle_gmm.fit(data)
            self.particles[i] = EMParticle(particle_gmm.weights_, particle_gmm.means_, particle_gmm.precisions_, self.eigval_coef, self.means_coef)
            
        for i in range(self.T1):
            for particle in self.particles:
                ll = particle.run_em_iters(self.T2, data)
                print('LL: ', ll)
                if ll > best_score:
                    best_score = ll
                    best_particle = deepcopy(particle)
                # particle.inject_noise()
            
            for j in range(len(self.particles)):
                self.particles[j] = EMParticle(best_particle.weights, best_particle.means, best_particle.precision_matrices, self.eigval_coef, self.means_coef)
                self.particles[j].inject_noise()

        for particle in self.particles:
            ll = particle.run_em_iters(self.T2, data)
            if ll > best_score:
                best_score = ll
            print('Final LL: ', ll)
        print('Best LL: ', best_score)

In [62]:
data = load_seg_data()
T1 = 20
T2 = 50
n_comp = 10
n_particles = 50

eigvals_coef = 0.00003
means_coef = 0.00003

em = NoiseEM(n_comp, n_particles, T1, T2, eigvals_coef, means_coef)

with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    em.run(data)

LL:  68.54615664311747
LL:  67.52292286951213
LL:  69.38576942938029
LL:  66.91132179392008
LL:  68.50964256232089
LL:  65.38534817530632
LL:  68.46066979957764
LL:  68.46281376864096
LL:  68.61754217004501
LL:  69.52211802781852
LL:  68.51698769234018
LL:  64.14237060664861
LL:  66.5047418137545
LL:  67.76166945868928
LL:  66.29551758528999
LL:  69.46591365486574
LL:  67.82793016851925
LL:  67.23041828982055
LL:  69.01501726474956
LL:  68.96104699463221
LL:  65.28730170617698
LL:  68.4835729500369
LL:  65.20209446808062
LL:  70.40123159893845
LL:  68.44091835730141
LL:  69.2883733233745
LL:  68.62112745633662
LL:  70.30795757489723
LL:  70.16847210070716
LL:  66.16595577933393
LL:  69.0750844837831
LL:  66.91005013658022
LL:  69.2832354191715
LL:  68.67225890608593
LL:  66.13090584369351
LL:  68.92926898123893
LL:  69.37993097418605
LL:  67.31111773037938
LL:  68.88070332579308
LL:  69.51494961815185
LL:  68.36267139582488
LL:  69.30400108902518
LL:  67.77209098918853
LL:  66.52212181