In [1]:
# load cloud data
import pandas as pd
import numpy as np
from sklearn.mixture import GaussianMixture
from sklearn.mixture._gaussian_mixture import _compute_precision_cholesky
import matplotlib.pyplot as plt

### Loading Cloud dataset

In [2]:
cloud_data_name = 'data/cloud.data'
with open(cloud_data_name) as f:
    cloud_data = pd.DataFrame([item.split() for item in f.readlines()])
cloud_data = cloud_data.astype(float).to_numpy()

In [3]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
scaler.fit(cloud_data)
cloud_data = scaler.transform(cloud_data)

### Loading Breast Cancer

In [4]:
bc_data_name = 'data/wdbc.data'
with open(bc_data_name) as f:
    data_bc = pd.DataFrame([item.split(',')[2:] for item in f.readlines()])
data_bc = data_bc.astype(float).to_numpy()

In [5]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
scaler.fit(data_bc)
data_bc = scaler.transform(data_bc)

### Loading LandSat Satelite

In [6]:
sat_trn_data_name = 'data/sat.trn'

with open(sat_trn_data_name) as f:
    data_sat_trn = pd.DataFrame([item.split(' ')[:36] for item in f.readlines()])
data_sat_trn = data_sat_trn.astype(float).to_numpy()

sat_tst_data_name = 'data/sat.tst'
with open(sat_tst_data_name) as f:
    data_sat_tst = pd.DataFrame([item.split(' ')[:36] for item in f.readlines()])
data_sat_tst = data_sat_tst.astype(float).to_numpy()

data_sat = np.concatenate([data_sat_trn, data_sat_tst], axis=0)


In [7]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
scaler.fit(data_sat)
data_sat = scaler.transform(data_sat)

### Helper methods

In [8]:
from matplotlib.pyplot import figure

figure(figsize=(20, 20), dpi=80)
plt.rcParams["figure.figsize"] = (20,20)

def plot_particle_trajectories(trajectories, best_em):
    for i in range(len(trajectories)):
        plt.plot(list(range(len(trajectories[i]))), trajectories[i])
    plt.plot(list(range(len(trajectories[i]))), [best_em for i in range(len(trajectories[i]))], linewidth=3, c='black')
    plt.show()


<Figure size 1600x1600 with 0 Axes>

### PSO with low rank PD addition method for GMM

In [89]:
def run_experiment_delta_parametrize(data, log_file_name, n_components=10, amplitude=0.05, rank=10, init_scale=1):
    # write PSO and k means algo
    n_particles = 25
    max_i = 200
    w = 0.5
    r_1 = 0.6
    r_2 = 0.8
    r_1_chol = 0.42
    r_2_chol = 0.57
    r_1_w = 0.42
    r_2_w = 0.57
    data_dim = data.shape[1]
    max_overall_iter = 100

    if rank > data_dim:
        rank = data_dim

    particle_trajectories = [[] for i in range(n_particles)]
    
    criterions = [0 for i in range(n_particles)]
    from functools import partial

    with open(log_file_name, 'w+') as f:
        
            
        v_weights = np.random.uniform(-1, 1, size=(n_particles, n_components))
        v_delta_means = np.random.uniform(-1, 1, size=(n_particles, n_components, data_dim))
        v_delta_diag_prec = np.random.uniform(-1, 1, size=(n_particles, n_components, data_dim))
        v_delta_param_prec = np.random.uniform(-1, 1, size=(n_particles, n_components, rank, data_dim))

        delta_means = np.zeros((n_particles, n_components, data_dim))
        weights = np.zeros((n_particles, n_components))
        delta_diag_prec = np.zeros((n_particles, n_components, data_dim))
        delta_param_prec = np.zeros((n_particles, n_components, rank, data_dim))

        # one BIG GMM init

        gmm = GaussianMixture(n_components=n_components, covariance_type='full', init_params='random', n_init=200)
        gmm.fit(data)

        base_chol = gmm.precisions_cholesky_

        basic_prec = np.zeros_like(gmm.precisions_cholesky_)
        for i in range(gmm.precisions_cholesky_.shape[0]):
            basic_prec[i] = gmm.precisions_cholesky_[i] @ gmm.precisions_cholesky_[i].T

        FB_base_chol = np.zeros_like(gmm.precisions_cholesky_)
        for i in range(gmm.precisions_cholesky_.shape[0]):
            FB_base_chol[i] = np.linalg.cholesky(basic_prec[i])


        basic_means = gmm.means_
        

        def is_pos_def(x):
            return np.all(np.linalg.eigvals(x) > 0)

        def log_likelihood_gmm_parametrized(weights, delta_means, delta_diag, delta_rank_params):
            means = basic_means + delta_means

            prec_matr = basic_prec
            addition = [0 for i in range(n_components)]

            cholesky = np.zeros_like(prec_matr)
            
            for i in range(n_components):
                addition[i]  = np.diag(delta_diag[i] ** 2)

                prec_matr[i] += np.diag(delta_diag[i] ** 2)

                rank = delta_param_prec.shape[-2]
                for k in range(rank):
                    prec_matr[i] += delta_rank_params[i][k] @ delta_rank_params[i][k].T
                    addition[i] += delta_rank_params[i][k] @ delta_rank_params[i][k].T

                cholesky[i] = np.linalg.cholesky(prec_matr[i])

            # for i in range(n_components):
            #     print('Norm of addiiton to precision matix', np.linalg.norm(addition[i]))
            # print('Delta of norms of cholesky before adidtion and after', np.linalg.norm(prec_matr - cholesky @ ch))
            # print('Data type form precision matrix', prec_matr.dtype)


            gmm_init = GaussianMixture(n_components=weights.shape[0], covariance_type='full', weights_init=weights, means_init=means)
            gmm_init.weights_ = weights
            gmm_init.means_ = means
            gmm_init.precisions_cholesky_ = cholesky

            base_gmm = GaussianMixture(n_components=weights.shape[0], covariance_type='full', weights_init=weights, means_init=means)
            base_gmm.weights_ = weights
            base_gmm.means_ = means
            base_gmm.precisions_cholesky_ = base_chol
            
            base_gmm_fb_chol = GaussianMixture(n_components=weights.shape[0], covariance_type='full', weights_init=weights, means_init=means)
            base_gmm_fb_chol.weights_ = weights
            base_gmm_fb_chol.means_ = means
            base_gmm_fb_chol.precisions_ = base_chol ** 2
            # base_gmm_fb_chol.precisions_cholesky_ = np.linalg.cholesky(base_chol ** 2)

            #print('Score of GMM with very first cholesky ', base_gmm.score(data))
            #print('Score of model with very basic cholesky after forward/backward decompose', base_gmm_fb_chol.score(data))
            #print('Score of GMM with OK cholesky after all operation inside the fucntion ', gmm_init.score(data))
            return gmm_init.score(data)

        # randomly init delta
        for i in range(n_particles):
            delta_means[i] = np.random.normal(0, init_scale, size=gmm.means_.shape)
            weights[i] = gmm.weights_
            delta_diag_prec[i] = np.random.normal(0, init_scale, size=delta_diag_prec[i].shape)
            delta_param_prec[i] = np.random.normal(0, init_scale, size=delta_param_prec[i].shape)

        p_weights = np.copy(weights)
        p_delta_means = np.copy(delta_means)
        p_delta_diag_prec = np.copy(delta_diag_prec)
        p_delta_param_prec = np.copy(delta_param_prec)

        g_weights = np.copy(weights[0])
        g_delta_means = np.copy(delta_means[0])
        g_delta_diag_prec = np.copy(delta_diag_prec[0])
        g_delta_param_prec = np.copy(delta_param_prec[0])

        for i in range(n_particles):
            if log_likelihood_gmm_parametrized(p_weights[i], p_delta_means[i], p_delta_diag_prec[i], p_delta_param_prec[i]) > log_likelihood_gmm_parametrized(g_weights, g_delta_means, g_delta_diag_prec, g_delta_param_prec):
                g_weights = np.copy(p_weights[i])
                g_delta_means = np.copy(p_delta_means[i])
                g_delta_diag_prec = np.copy(p_delta_diag_prec[i])
                g_delta_param_prec = np.copy(p_delta_param_prec[i])
        
        f.write(f'Best initial log likelihood {log_likelihood_gmm_parametrized(g_weights, g_delta_means, g_delta_diag_prec, g_delta_param_prec)}\n')
        # Data for the plotting
        best_loglikelihood_init = log_likelihood_gmm_parametrized(g_weights, g_delta_means, g_delta_diag_prec, g_delta_param_prec)
        pso_best_loglikelihood = []

        iter_n = -1
        while (iter_n < max_i):

            f.write(f'Iter {iter_n}\n')

            # reinit GMM here
            # in case it is not the first iteration

            gmm_reinit_list = []

            # if (iter_n >= 0):
            #     for i in range(n_particles):
            #        gmm_reinit_list.append(reinit_gmm(weights[i], delta_means[i], delta_diag_prec[i], delta_param_prec[i]))

            # update personal best
            for i in range(n_particles):
                # f.write(f'Particle {i} loglikelihood after reinit: {gmm_reinit_list[i].score}\n')
                criterions[i] = log_likelihood_gmm_parametrized(weights[i], delta_means[i], delta_diag_prec[i], delta_param_prec[i])
                particle_trajectories[i].append(criterions[i])
                if log_likelihood_gmm_parametrized(p_weights[i], p_delta_means[i], p_delta_diag_prec[i], p_delta_param_prec[i]) < criterions[i]:
                    p_weights[i] = np.copy(weights[i])
                    p_delta_means[i] = np.copy(delta_means[i])
                    p_delta_diag_prec[i] = np.copy(delta_diag_prec[i])
                    p_delta_param_prec[i] = np.copy(p_delta_param_prec[i])
        
            g_new_weights = np.copy(g_weights)
            g_new_delta_means = np.copy(g_delta_means)
            g_new_delta_diag_prec = np.copy(g_delta_diag_prec)
            g_new_delta_param_prec = np.copy(g_delta_param_prec)

            # updating global best
            for i in range(n_particles):
                if log_likelihood_gmm_parametrized(p_weights[i], p_delta_means[i], p_delta_diag_prec[i], p_delta_param_prec[i]) > log_likelihood_gmm_parametrized(g_new_weights, g_new_delta_means, g_new_delta_diag_prec, g_new_delta_param_prec):
                    g_new_weights = np.copy(p_weights[i])
                    g_new_delta_means = np.copy(p_delta_means[i])
                    g_new_delta_diag_prec = np.copy(p_delta_diag_prec[i])
                    g_new_delta_param_prec = np.copy(p_delta_param_prec[i])
                

            # if there are no imprevements
            delta_global = log_likelihood_gmm_parametrized(g_new_weights, g_new_delta_means, g_new_delta_diag_prec, g_new_delta_param_prec) - log_likelihood_gmm_parametrized(g_weights, g_delta_means, g_delta_diag_prec, g_delta_param_prec)
            
            if delta_global > 0:
                g_weights = np.copy(g_new_weights)
                g_delta_means = np.copy(g_new_delta_means)
                g_delta_diag_prec = np.copy(g_new_delta_diag_prec)
                g_delta_param_prec = np.copy(g_new_delta_param_prec)

            # if delta_global > 1e-5:
            #     iter_n = 0
            # else:
            #     iter_n += 1
            iter_n += 1
            
            # PSO update
            for j in range(n_particles):
                c_1 = np.random.uniform(0, 1)
                c_2 = np.random.uniform(0, 1)

                # weights
                # deleted v_weights * w to exclude probability that weights will go below zero
                v_weights[j] = c_1 * r_1_w * (p_weights[j] - weights[j]) + c_2 * r_2_w * (g_weights - weights[j])
                weights[j] += amplitude * v_weights[j]

                v_delta_means[j] = c_1 * r_1 * (p_delta_means[j] - delta_means[j]) + c_2 * r_2 * (g_delta_means - delta_means[j])
                delta_means[j] += amplitude * v_delta_means[j]

                v_delta_diag_prec[j] = c_1 * r_1 * (p_delta_diag_prec[j] - delta_diag_prec[j]) + c_2 * r_2 * (g_delta_diag_prec - delta_diag_prec[j])
                delta_diag_prec[j] += amplitude * v_delta_diag_prec[j]

                v_delta_param_prec[j] = c_1 * r_1 * (p_delta_param_prec[j] - delta_param_prec[j]) + c_2 * r_2 * (g_delta_param_prec - delta_param_prec[j])
                delta_param_prec[j] +=  amplitude * v_delta_param_prec[j]
                
            f.write(f'Best log likelihood {log_likelihood_gmm_parametrized(g_weights, g_delta_means, g_delta_diag_prec, g_delta_param_prec)}\n')
            pso_best_loglikelihood.append(log_likelihood_gmm_parametrized(g_weights, g_delta_means, g_delta_diag_prec, g_delta_param_prec))

            f.flush()
        
        n_inits_for_reference = max_i * n_particles

        # gmm_random_init = GaussianMixture(n_components=n_components, covariance_type='full', init_params='random', n_init=n_inits_for_reference)
        # gmm_random_init.fit(data)
        # f.write(f'Best log likelihood for random init GMM with {max_i * n_particles} initalizations: {gmm_random_init.score(data)}')
        # large_gmm_init_score = gmm_random_init.score(data)

        large_gmm_init_score = 0
    return pso_best_loglikelihood, best_loglikelihood_init, large_gmm_init_score, particle_trajectories



In [90]:
init_scale_range = [0.001, 0.0005]
for init_scale  in init_scale_range:

    log_file_name = f'pso_low_rank_parametrize_init_scale_{init_scale}.log'

    run_experiment_delta_parametrize(cloud_data, log_file_name, n_components=10, amplitude=0.05, rank=5, init_scale=init_scale)


In [84]:

gmm = GaussianMixture(n_components=10, covariance_type='full', init_params='random', n_init=500)
gmm.fit(cloud_data)

base_chol = gmm.precisions_cholesky_

basic_prec = gmm.precisions_cholesky_ ** 2
basic_means = gmm.means_
basic_cov = gmm.covariances_
print(gmm.score(cloud_data))

25.035450343122715
