In [5]:
%load_ext autoreload
%autoreload 2

In [21]:
import numpy as np 
from pycss.utils import *
from pycss.subset_selection import *
from scipy import stats

Generate factor model and data

In [13]:
p = 50
n= 2000
k= 15
B = 10

def gen_Sigma(n, p, k):
    W = np.random.multivariate_normal(np.zeros(k), cov=np.eye(k), size=p)
    D = np.sqrt(k) * np.square(np.random.normal(0, 1, p))
    Sigma = W @ W.T
    np.fill_diagonal(Sigma, np.diag(Sigma) + D)
    Sigma = standardize_cov(Sigma)
    X = np.random.multivariate_normal(np.zeros(p), cov= Sigma, size=n)
    mu_hat, Sigma_hat = get_moments(X)
    return Sigma_hat

Test Greedy CSS

In [14]:
Sigmas = []
for i in range(B):
    Sigma = gen_Sigma(n, p, k)
    Sigmas.append(Sigma)
    N1 = np.random.choice([0, 3, 6, 9], 1, p=[0.7, 0.1, 0.1, 0.1])
    N2 = np.random.choice([0, 3, 6, 9], 1, p=[0.7, 0.1, 0.1, 0.1])
    include = np.random.choice(np.arange(p), N1, replace=False).astype(int)
    exclude = np.random.choice([idx for idx in np.arange(p) if idx not in include], N2, replace=False).astype(int)
    
    S, _ = greedy_css(Sigma, k=p-len(exclude), include=include, exclude=exclude)
    S_cutoff, _ = greedy_css(Sigma, cutoffs=0, include=include, exclude=exclude)
    
    if not set(include).issubset(S):
            print('Iteration ' + str(i) + ' does not include all of include.')
            
    if len(set(exclude).intersection(S)) > 0:
            print('Iteration ' + str(i) + ' does not exclude all of exclude.')
    
    if not np.all(S == S_cutoff):
        print('k and cutoff dont agree for iteration ' + str(i))
    
    naive_S = (-1 * np.ones(p - len(exclude))).astype(int)
    naive_S[:len(include)] = include 
    for j in range(len(include), len(naive_S)):
        options = [idx for idx in np.arange(p) if (idx not in naive_S and idx not in exclude)]
        best_idx = None
        best_obj_val = np.inf
        for idx in options:
            potential_S = np.concatenate([naive_S[ :j], np.array([idx]) ])
            obj_val = np.trace(regress_off(Sigma, potential_S))
            if obj_val < best_obj_val:
                best_obj_val = obj_val
                best_idx = idx
        naive_S[j] = best_idx
    
    if not np.all(S == naive_S):
        print('On iteration ' + str(i) + ' we dont match the naive solution at ' + str(np.where(naive_S != S)[0]))
              

Test Swapping CSS 

In [15]:
Sigmas = []
for i in range(B):
    Sigma = gen_Sigma(n, p, k)
    Sigmas.append(Sigma)
    for s in range(1, 15):
        N1 = np.random.choice(np.arange(s))
        N2 = np.random.choice(np.arange(s))
        include = np.random.choice(np.arange(p), N1, replace=False).astype(int)
        exclude = np.random.choice([idx for idx in np.arange(p) if idx not in include], N2, replace=False).astype(int)
        S, _, _, converged = swapping_css(Sigma, s, tol=TOL, include=include, exclude=exclude)
        
        if not set(include).issubset(S):
            print('Size ' + str(s) + ' subset for iteration ' + str(i) + ' does not include all of include.')
            
        if len(set(exclude).intersection(S)) > 0:
            print('Size ' + str(s) + ' subset for iteration ' + str(i) + ' does not exclude all of exclude.')
        
        if not converged:
            print('Size ' + str(s) + ' subset for iteration ' + str(i) + ' did not converge.')
            continue 
    
        for j in range(len(S)):
            chosen = S[j]
            if chosen in include:
                continue 
            temp_S  = np.delete(S, j)
            options = np.array([idx for idx in np.arange(p) if (idx not in exclude and idx not in temp_S)])
            best_obj_val = np.inf
            best_idx = None
            
            for ell in options:
                potential_S= np.concatenate([temp_S, np.array([ell])]).astype(int)
                obj_val = np.trace(regress_off(Sigma, potential_S))
                if obj_val < best_obj_val:
                    best_obj_val = obj_val
                    best_idx = ell
            
            if best_idx != chosen:
                print('Mistake chosing index ' + str(best_idx) + 'zfor the size ' + str(s) + ' subset ' + ' on iteration ' + str(i))

Testing Greedy Subset Factor

In [20]:
def compute_test_stat(Sigma, S):
    p = Sigma.shape[0]
    Sigma_R = regress_off(Sigma, S)
    S_comp = complement(p, S)
    test_stat = np.sum(np.log(np.diag(Sigma_R)[S_comp])) - np.linalg.slogdet(Sigma_R[S_comp, :][:, S_comp])[1]
    return test_stat

In [17]:
p = 10
n= 2000
k= 5
B = 10

In [18]:
Sigmas = []
cutoffs = -1*np.inf *np.ones(p + 1)
for i in range(B):
    Sigma = gen_Sigma(n, p, k)
    Sigmas.append(Sigma)
    if np.min(np.linalg.eig(Sigma)[0]) <= 0:
        print('Iteration ' + str(i) + ' is not colinear.')
        continue 
    N1 = np.random.choice([0, 1, 2], 1)
    N2 = np.random.choice([0, 1, 2], 1)
    include = np.random.choice(np.arange(p), N1, replace=False).astype(int)
    exclude = np.random.choice([idx for idx in np.arange(p) if idx not in include], N2, replace=False).astype(int)
    
    S, reject = greedy_subset_factor_selection(Sigma, cutoffs=cutoffs, include=include, exclude=exclude)
    if not set(include).issubset(S):
            print('Iteration ' + str(i) + ' does not include all of include.')
            
    if len(set(exclude).intersection(S)) > 0:
            print('Iteration ' + str(i) + ' does not exclude all of exclude.')
    
    ell = len(S)
    if ell != min(p - len(exclude), p-1):
        print('Iteration ' + str(i) + ' selected less than p - len(exclude).')
        
        
    naive_S = -1 * np.ones(ell)
    naive_S[:len(include)] = include
    for j in range(len(include), ell):
        if j == p-1:
            break
        options = np.array([idx for idx in np.arange(p) if (idx not in exclude and idx not in naive_S)])
        best_idx = None
        best_test_stat = np.inf
        for idx in options:
            test_stat = compute_test_stat(Sigma, np.concatenate([S[:j], np.array([idx])]))
            if test_stat < best_test_stat:
                best_idx = idx
                best_test_stat = test_stat
        naive_S[j] = best_idx
            
    if not np.all(S[:min(ell, p-2)] == naive_S[:min(ell, p-2)]):
        print('On iteration ' + str(i) + ' we dont match the naive solution at ' + str(np.where(S[:min(ell, p-2)] != naive_S[:min(ell, p-2)])[0]))       
        

Test swapping subset factor selection

In [19]:
Sigmas = []
cutoffs = -1*np.inf *np.ones(p + 1)
for i in range(B):
    Sigma = gen_Sigma(n, p, k)
    Sigmas.append(Sigma)
    if np.min(np.linalg.eig(Sigma)[0]) <= 0:
        print('Iteration ' + str(i) + ' is not colinear.')
        continue 
    N1 = np.random.choice([0, 1, 2], 1)
    N2 = np.random.choice([0, 1, 2], 1)
    include = np.random.choice(np.arange(p), N1, replace=False).astype(int)
    exclude = np.random.choice([idx for idx in np.arange(p) if idx not in include], N2, replace=False).astype(int)
    
    S, reject = swapping_subset_factor_selection(Sigma, k=k, cutoff=-np.inf, include=include, exclude=exclude)
    if not set(include).issubset(S):
            print('Iteration ' + str(i) + ' does not include all of include.')
            
    if len(set(exclude).intersection(S)) > 0:
            print('Iteration ' + str(i) + ' does not exclude all of exclude.')
        
        
    for j in range(len(S)):
        chosen = S[j]
        if chosen in include:
            continue 
        temp_S  = np.delete(S, j)
        options = np.array([idx for idx in np.arange(p) if (idx not in exclude and idx not in temp_S)])
        best_test_stat = np.inf
        best_idx = None
            
        for ell in options:
            potential_S= np.concatenate([temp_S, np.array([ell])]).astype(int)
            test_stat = compute_test_stat(Sigma, potential_S)
            if test_stat < best_test_stat:
                best_test_stat = test_stat
                best_idx = ell
            
        if best_idx != chosen:
            print('Mistake chosing index ' + str(best_idx) + 'zfor the size ' + str(s) + ' subset ' + ' on iteration ' + str(i))      
        