This file contains python code for convergence test under linear setting

In [1]:
import sys
sys.path.append("../mypkg")

In [2]:
import numpy as np
import torch
import matplotlib.pyplot as plt
import seaborn as sns
import xarray as xr
from scipy.stats import pearsonr
from numbers import Number

from easydict import EasyDict as edict
from tqdm import trange
from scipy.io import loadmat

plt.style.use('ggplot')
plt.rcParams["savefig.bbox"] = "tight"

In [3]:
import importlib
import hdf_utils.data_gen
importlib.reload(hdf_utils.data_gen)

<module 'hdf_utils.data_gen' from '/data/rajlab1/user_data/jin/MyResearch/HDF_infer/notebooks/../mypkg/hdf_utils/data_gen.py'>

In [4]:
from constants import DATA_ROOT, RES_ROOT, FIG_ROOT

from hdf_utils.data_gen import gen_covs, gen_simu_psd
from utils.matrix import col_vec_fn, col_vec2mat_fn, conju_grad, svd_inverse
from utils.functions import logit_fn
from utils.misc import save_pkl
from splines import obt_bsp_basis_Rfn
from projection import euclidean_proj_l1ball
from optimization.one_step_opt import OneStepOpt

from penalties.scad_pen import SCAD
from penalties.base_pen import PenaltyBase
from models.logistic_model import LogisticModel
from models.linear_model import LinearModel


In [5]:
torch.set_default_tensor_type(torch.DoubleTensor)

## Param and fns

### Params

In [6]:
# freqs
ind_freq = np.linspace(1, 40, 40)

In [12]:
# some fn for GT beta(s)
def _fn1(x):
    """10*sin(6pi*x)
    """
    return 10*np.sin(2*np.pi*3*x)
def _fn2(x):
    """10 * (exp(x)-x+sin(4pi*x))
    """
    p1 = np.exp(x) -x
    p2 = 1*np.sin(2*np.pi*2*x)
    return 10*(p1 + p2)

def _fn3(x):
    """ x^3 - 3x^2 + 2x - 10
    """
    return x**3 - 3*x**2 + 2*x -10

def _fn4(x):
    return 20*x 

def _fn5(x):
    return -20*np.log(x**4+1)-6

def _zero_fn(x):
    if isinstance(x, Number):
        return 0
    else:
        return np.zeros(len(x))

In [15]:
np.random.seed(0)
paras = edict()
paras.model = "linear"
paras.num_rep = 100
paras.n = 1000 # num of data obs to be genareted
paras.ns = [100, 300, 900, 2700, 8100, 24300]

paras.npts = 40 # num of pts to evaluate X(s)
paras.d = 10 # num of ROIs
paras.q = 5 # num of other covariates
paras.sigma2 = 1 # variance of the error
paras.sel_idx = np.arange(2, paras.d) # M^c set
paras.stop_cv = 5e-4 # stop cv for convergence
paras.max_iter = 2000
paras.can_lams = [1e-1, 3e-1, 1e0, 3e0, 9e0, 3e1, 1e2]
paras.can_Rfcts = [1,  2]


paras.bsp = edict()
paras.bsp.aknots_raw = np.linspace(0, 1, 20)
paras.bsp.iknots = paras.bsp.aknots_raw[1:-1]
paras.bsp.bknots = np.array([0, 1])
paras.bsp.ord = 4
paras.bsp.x = np.linspace(0, 1, paras.npts)
#paras.bsp.x = np.linspace(0, 1, paras.npts+1)[:-1]
paras.bsp.basis_mat = obt_bsp_basis_Rfn(paras.bsp.x, 
                                        paras.bsp.iknots, 
                                        paras.bsp.bknots, 
                                        paras.bsp.ord)
paras.bsp.N = paras.bsp.basis_mat.shape[1] # num of basis for bsp

paras.types_ = ["int", 2, 2, "c", "c"]
paras.alp_GT = np.array([5, 1, -2, 3, -4])

beta_type_GT = [_fn1, _fn2, _fn3] + [_zero_fn]*(paras.d-3-2) + [_fn4, _fn5]
paras.beta_v_GT = np.array([_fn(paras.bsp.x) for _fn in beta_type_GT]).T
paras.Gam_est = (np.linalg.inv(paras.bsp.basis_mat.T 
                               @ paras.bsp.basis_mat) 
                 @ paras.bsp.basis_mat.T 
                 @ paras.beta_v_GT)
#paras.Gam_GT = np.random.randn(paras.bsp.N, paras.d)*10

paras.Rmin = 2*(np.linalg.norm(paras.Gam_est/np.sqrt(paras.bsp.N), axis=0).sum() + np.abs(paras.alp_GT).sum())

paras.basis_mat = torch.DoubleTensor(paras.bsp.basis_mat) # npts x N

In [16]:
paras.save_dir = RES_ROOT/"linear_1"
if not paras.save_dir.exists():
    paras.save_dir.mkdir()
save_pkl(paras.save_dir/"paras.pkl", paras, is_force=True)

Save to /data/rajlab1/user_data/jin/MyResearch/HDF_infer/notebooks/../mypkg/../results/linear_1/paras.pkl


### Fns

In [9]:
def gen_simu_data_all(seed):
    np.random.seed(seed)
    # simulated PSD
    assert len(paras.types_) == paras.q
    assert len(paras.alp_GT) == paras.q
    
    simu_psd = gen_simu_psd(paras.n, paras.d, 10)
    simu_covs = gen_covs(paras.n, paras.types_)
    
    # linear term and Y
    int_part = np.sum(paras.beta_v_GT.T* simu_psd[:, :, :], axis=1).mean(axis=1)
    cov_part = simu_covs @ paras.alp_GT 
    
    # linear term
    lin_term = cov_part + int_part
    
    # Y 
    rvs = np.random.rand(paras.n)
    Y = lin_term + np.random.randn(paras.n)*np.sqrt(paras.sigma2)
    
    # To torch
    X = torch.tensor(simu_psd) # n x d x npts
    Z = torch.tensor(simu_covs) # n x q
    Y = torch.tensor(Y)
    
    all_data = edict()
    all_data.X = X
    all_data.Y = Y
    all_data.Z = Z
    return all_data

## Simu

In [10]:
def run_fn(seed, lam, Rfct):
    np.random.seed(seed)
    torch.manual_seed(seed)
    
    alp_init = torch.tensor(paras.alp_GT) + torch.randn(paras.q)*10
    Gam_init = torch.tensor(paras.Gam_est) + torch.randn(paras.bsp.N, paras.d)*10
    theta_init = torch.cat([alp_init, col_vec_fn(Gam_init)/np.sqrt(paras.bsp.N)])
    rhok_init = torch.randn(paras.d*paras.bsp.N)
    last_Gamk = 0
    last_rhok = 0
    last_thetak = 0
    
    cur_data = gen_simu_data_all(seed)
    model = LinearModel(Y=cur_data.Y, X=cur_data.X, Z=cur_data.Z, 
                        basis_mat=paras.basis_mat, sigma2=paras.sigma2)
    # 3e0
    pen = SCAD(lams=lam, a=3.7,  sel_idx=paras.sel_idx)
    
    for ix in trange(paras.max_iter):
        opt = OneStepOpt(Gamk=Gam_init, 
                      rhok=rhok_init, 
                      theta_init=theta_init, 
                      alpha=0.9, beta=1, model=model, penalty=pen, 
                      q=paras.q, NR_eps=1e-5, NR_maxit=100, R=paras.Rmin*Rfct)
        opt()
        Gam_init = opt.Gamk
        rhok_init = opt.rhok
        theta_init = opt.thetak
        
        
        # converge cv
        Gam_diff = opt.Gamk- last_Gamk
        Gam_diff_norm = torch.norm(Gam_diff)/torch.norm(opt.Gamk)
        
        theta_diff = opt.thetak - last_thetak
        theta_diff_norm = torch.norm(theta_diff)/torch.norm(opt.thetak)
        
        Gam_theta_diff = opt.Gamk - col_vec2mat_fn(opt.thetak[paras.q:], nrow=paras.bsp.N)*np.sqrt(paras.bsp.N)
        Gam_theta_diff_norm = torch.norm(Gam_theta_diff)/torch.norm(opt.Gamk)
        
        stop_v = np.max([Gam_diff_norm.item(), theta_diff_norm.item(), Gam_theta_diff_norm.item()])
        if stop_v < paras.stop_cv:
            break
            
        last_Gamk = opt.Gamk
        last_rhok = opt.rhok
        last_thetak = opt.thetak
    
    if ix == (paras.max_iter-1):
        print(f"The optimization under seed {seed} may not converge with stop value {stop_v:.3f}")
    return opt, ix == (paras.max_iter-1)

In [11]:
for cur_lam in paras.can_lams:
    for cur_Rfct in paras.can_Rfcts:
        fil_name = f"result_lam-{cur_lam*100:.0f}_Rfct-{cur_Rfct*10:.0f}_n-{paras.n:.0f}.pkl"
        opt_results = []
        for seed in range(paras.num_rep):
            print(cur_lam, cur_Rfct, seed)
            opt_result = run_fn(seed, lam=cur_lam, Rfct=cur_Rfct)
            opt_results.append([opt_result, seed])
        save_pkl(paras.save_dir/fil_name, opt_results)

0.1 1 0


 27%|████████████████████████████████████▍                                                                                                 | 544/2000 [00:01<00:03, 436.62it/s]


0.1 1 1


 26%|██████████████████████████████████▉                                                                                                   | 522/2000 [00:01<00:03, 433.13it/s]


0.1 1 2


 28%|█████████████████████████████████████▏                                                                                                | 555/2000 [00:01<00:03, 419.96it/s]


0.1 1 3


 29%|██████████████████████████████████████▎                                                                                               | 571/2000 [00:01<00:03, 415.11it/s]


0.1 1 4


 25%|████████████████████████████████▉                                                                                                     | 491/2000 [00:01<00:03, 418.60it/s]


0.1 1 5


ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/opt/conda/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3444, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/tmp/ipykernel_50517/2234391497.py", line 7, in <module>
    opt_result = run_fn(seed, lam=cur_lam, Rfct=cur_Rfct)
  File "/tmp/ipykernel_50517/1679681407.py", line 13, in run_fn
    cur_data = gen_simu_data_all(seed)
  File "/tmp/ipykernel_50517/3798789312.py", line 7, in gen_simu_data_all
    simu_psd = gen_simu_psd(paras.n, paras.d, 10)
  File "/data/rajlab1/user_data/jin/MyResearch/HDF_infer/notebooks/../mypkg/hdf_utils/data_gen.py", line 40, in gen_simu_psd
    psd_out = psd_raw + np.random.randn(len(psd_raw))*noise_sd
KeyboardInterrupt

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/opt/conda/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 2064, in showtraceback
    stb = value._render_traceback_(

TypeError: object of type 'NoneType' has no len()