In [5]:
%load_ext autoreload
%autoreload 2

In [1]:
import numpy as np 
from pycss.utils import *
from pycss.subset_selection import *
from scipy import stats
from notebook_utils.utils import *

Some helper functions for testing

In [2]:
def replace_submatrix(mat, ind1, ind2, mat_replace):
    for i, index in enumerate(ind1):
        mat[index, ind2] = mat_replace[i, :]
    return mat

def compute_Sigma_MLE(Sigma, S, noise='sph'):
    p = Sigma.shape[0]
    k = len(S)
    S = np.sort(S)
    S_comp = complement(p, S)
    Sigma_R = regress_off(Sigma, S) 
    D_ = np.diag(Sigma_R)[S_comp]
    if noise == 'sph':
        D = np.ones(p - k) * np.mean(D_)
    if noise == 'diag':
        D = D_.copy()
    Sigma_MLE = np.zeros((p, p))
    Sigma_MLE = replace_submatrix(Sigma_MLE, S, S, Sigma[S, :][:, S])
    Sigma_MLE = replace_submatrix(Sigma_MLE, S, S_comp,  Sigma[S, :][:, S_comp])
    Sigma_MLE = replace_submatrix(Sigma_MLE, S_comp, S, Sigma[S_comp, :][:, S])
    Sigma_MLE = replace_submatrix(Sigma_MLE, S_comp, S_comp, Sigma[S_comp, :][:, S] @  np.linalg.inv(Sigma[S, :][:, S]) @ Sigma[S, :][:, S_comp] + np.diag(D))
    return Sigma_MLE

Generate factor model and data

In [3]:
p = 50
n= 2000
k= 15

W = np.random.multivariate_normal(np.zeros(k), cov=np.eye(k), size=p)
D = np.square(np.random.normal(0, 1, p))
Sigma = W @ W.T
np.fill_diagonal(Sigma, np.diag(Sigma) + D)
Sigma = standardize_cov(Sigma)
X = np.random.multivariate_normal(np.zeros(p), cov= Sigma, size=n)
mu_hat, Sigma_hat = get_moments(X)

Test Greedy CSS

In [4]:
for k in range(1, p):
    S, Sigma_R, errors = greedy_subset_selection(Sigma_hat, k, css_objective, flag_colinearity=False, tol=TOL)
    if len(errors) > 0:
        print('Colinearity errors at ' + str(k) + ': ', errors)
    S_removed = S[:(k - 1)]
    S_removed_comp = complement(p, S_removed)
    temp_objectives = []
    for t in S_removed_comp:
        S_added= np.concatenate([S_removed, np.array([t])]).astype(int)
        temp_objectives.append(np.mean(np.diag(regress_off(Sigma_hat, S_added, tol=TOL))))
    if S_removed_comp[np.argmin(temp_objectives)] != S[k-1]:
        print('Mistake at ', k)

Test Swapping CSS 

In [5]:
for k in range(1, 15):
    S, Sigma_R, S_init, converged, errors = swapping_subset_selection(Sigma_hat, k, css_objective, flag_colinearity=False, tol=TOL)
    if len(errors) > 0:
        print('Colinearity errors at ' + str(k) + ': ', errors)
    if not converged:
        print(str(k) + ' did not converge')
        continue 
    for i in range(len(S)):
        chosen = S[i]
        S_temp = np.delete(S, i)
        S_temp_comp = complement(p, S_temp)
        temp_objectives = []
    for t in S_temp_comp:
        S_added= np.concatenate([S_temp, np.array([t])]).astype(int)
        temp_objectives.append(np.mean(np.diag(regress_off(Sigma_hat, S_added, tol=TOL))))
    if S_temp_comp[np.argmin(temp_objectives)] != chosen:
        print('Mistake at ', k)

Test Greedy PCSS 

In [6]:
noise = 'sph'

for k in range(1, p-1):
    if noise == 'sph':
        S, Sigma_R, errors = greedy_subset_selection(Sigma_hat, k, sph_pcss_objective, flag_colinearity=True, tol=TOL)
    if noise == 'diag':
        S, Sigma_R, errors = greedy_subset_selection(Sigma_hat, k, diag_pcss_objective, flag_colinearity=True, tol=TOL)
    if len(errors) > 0:
        print('Colinearity errors at ' + str(k) + ': ', errors)
    S_removed = S[:(k - 1)]
    S_removed_comp = complement(p, S_removed)
    temp_objectives = []
    for t in S_removed_comp:
        S_added = np.concatenate([S_removed, np.array([t])]).astype(int)
        Sigma_MLE_temp = compute_Sigma_MLE(Sigma_hat, S=S_added, noise=noise)
        temp_objectives.append(-1 * np.mean(stats.multivariate_normal(mean=mu_hat, cov=Sigma_MLE_temp).logpdf(X)))
    if S_removed_comp[np.argmin(temp_objectives)] != S[k-1]:
        print('Mistake at ', k)

Test Swapping PCSS

In [7]:
noise = 'sph'

for k in range(1, 15):
    if noise == 'sph':
        S, Sigma_R, S_init, converged, errors = swapping_subset_selection(Sigma_hat, k, sph_pcss_objective, flag_colinearity=True, tol=TOL)
    if noise == 'diag':
        S, Sigma_R, S_init, converged, errors = swapping_subset_selection(Sigma_hat, k, diag_pcss_objective, flag_colinearity=True, tol=TOL)
  
    if len(errors) > 0:
        print('Colinearity errors at ' + str(k) + ': ', errors)

    if not converged:
        print(str(k) + ' did not converge')
        continue 

    for i in range(len(S)):
        chosen = S[i]
        S_temp = np.delete(S, i)
        S_temp_comp = complement(p, S_temp)
        temp_objectives = []
        for t in S_temp_comp:
            S_added= np.concatenate([S_temp, np.array([t])]).astype(int)
            Sigma_MLE_temp = compute_Sigma_MLE(Sigma_hat, S=S_added, noise=noise)
            temp_objectives.append(-1 * np.mean(stats.multivariate_normal(mean=mu_hat, cov=Sigma_MLE_temp).logpdf(X)))
        if S_temp_comp[np.argmin(temp_objectives)] != chosen:
            print('Mistake at ', k)