In [17]:
import pandas as pd
import sys,codecs
import numpy as np
import re
import os
import matplotlib.pyplot as plt
import scipy.stats as st
import collections
import statsmodels.api as sm
from time import time
import random

In [2]:
import os

NUM_CPU = len(os.sched_getaffinity(0)) #os.cpu_count() 

print(f'CPU counts: {NUM_CPU}')

CPU counts: 36


In [3]:
NUM_THREADS = 4

os.environ["MKL_NUM_THREADS"]     = str(NUM_THREADS)
os.environ["NUMEXPR_NUM_THREADS"] = str(NUM_THREADS)
os.environ["OMP_NUM_THREADS"]     = str(NUM_THREADS)

NUM_PROCESS = NUM_CPU // NUM_THREADS
print(f'max process: {NUM_PROCESS}')

max process: 9


In [4]:
# dimension
n = 1500; p = 200; q = 20; d = 3

In [5]:
# true parameter
tau = np.round(np.random.uniform(0.15,0.2,p),4)
rho = np.round(np.random.uniform(0.2,0.9,p),4)
beta0 = np.round(np.random.uniform(0.5,1,(p,10)),4)
beta1 = np.zeros((p,q-10))
beta = np.hstack((beta0, beta1))
bc = np.array(np.hstack([np.random.normal(0,1,(p,d))]))
# Omega mean and variance
mean = np.zeros(p)
ta = 0.15
i, j = np.mgrid[:p, :p]
cov = ta**2*ta**abs(i-j)
cov[list(range(p)),list(range(p))] = ta
sigma2 = np.zeros(p)
for j in range(p):
    sigma2[j] = bc[j,:]@bc[j,:] + ta

In [6]:
# adjacency matrix-DIM
pv = [2/n,0.5*(n**(-0.8)),0.5*(n**(-0.8)),1-2/n-0.5*(n**(-0.8))-0.5*(n**(-0.8))]
A = np.zeros((n,n))
for i in range(0,n):
    for j in range(i+1,n):
        m = np.argmax(np.random.multinomial(1,pv,1))
        if m==0:
            A[i,j] = A[j,i] = 1
        elif m==1:
            A[i,j] = 1; A[j,i] = 0
        elif m==2:
            A[i,j] = 0; A[j,i] = 1
ind = (np.where(np.sum(A,1)==0))[0]
for i in ind:
    A[i,random.sample(list(range(n)),4)] = 1
A[list(range(n)),list(range(n))] = 0
W = A/np.sum(A,1).reshape(n,1)

In [7]:
# check
print(np.sum(A))
np.max(np.sum(A,1))

6447.0


11.0

In [256]:
#Save true parameters
pd.DataFrame(rho).to_csv("Results_dep_BIC/n1500_p200_B100/rho_true_n1500_p200.csv",index=False)
pd.DataFrame(beta).to_csv("Results_dep_BIC/n1500_p200_B100/beta_true_n1500_p200.csv",index=False)
pd.DataFrame(bc).to_csv("Results_dep_BIC/n1500_p200_B100/B_true_n1500_p200.csv",index=False)
pd.DataFrame(A).to_csv("Results_dep_BIC/A_n1500.csv",index=False)

In [8]:
# Data generation function
def data_generator(nn, p, q, d, seed):

    rng = np.random.default_rng(seed) 
    
    q0 = 10
    Omega = rng.multivariate_normal(mean, cov, (nn,), 'raise')   
    X = rng.normal(0,1,(nn,q0))
    Z = rng.normal(0,1,(nn,d))
    
    Y = np.zeros((n,p))
    for j in range(p):
        y_j = np.dot(np.linalg.inv(np.eye(nn)-rho[j]*W), X@beta[j,:q0] + Z@bc[j,:]+Omega[:,j])#
        Y[:,j] = y_j 
        
    return Y,X,Z

In [9]:
# CMLE-gradient
def gardient_initial_lj_0(nn, p, q, WW, para, Yj, XX):
    
    q0 = 10
    
    rho_j = para[0]
    beta_j = para[1:(q0+1)]
    sigma2_j = para[-1]
    
    
    Sj = np.eye(nn)-rho_j*WW
    S_inverse = np.linalg.inv(Sj)
    G = WW@S_inverse
    Ep = Sj@Yj - XX@beta_j
    
    g1 = -np.trace(G)+np.dot((WW@Yj).T,Ep)/sigma2_j
    g2 = (XX.T@Ep)/sigma2_j
    g3 = -nn/(2*sigma2_j)+(Ep.T@Ep)/(2*(sigma2_j**2))
    #print(Ep)
    
    return np.concatenate(([g1],g2,[g3]))

In [10]:
# CMLE-gradient-full q
def gardient_initial_lj(nn, p, q, WW, para, Yj, XX):

    
    rho_j = para[0]
    beta_j = para[1:(q+1)]
    sigma2_j = para[-1]
    
    
    Sj = np.eye(nn)-rho_j*WW
    S_inverse = np.linalg.inv(Sj)
    G = WW@S_inverse
    Ep = Sj@Yj - XX@beta_j
    
    g1 = -np.trace(G)+np.dot((WW@Yj).T,Ep)/sigma2_j
    g2 = (XX.T@Ep)/sigma2_j
    g3 = -nn/(2*sigma2_j)+(Ep.T@Ep)/(2*(sigma2_j**2))
    #print(Ep)
    
    return np.concatenate(([g1],g2,[g3]))

In [11]:
# CMLE-hessian 
def hessian_initial_lj_0(nn, p, q, Wt, para, Yj, XX):
    
    q0 = 10
    
    rho_j = para[0]
    beta_j = para[1:(q0+1)]
    sigma2_j = para[-1]
    
    Sj = np.eye(nn)-rho_j*Wt
    S_inverse = np.linalg.inv(Sj)
    G = np.dot(Wt,S_inverse)
    WW = np.dot(Wt.T,Wt)
    WS = np.dot(Wt.T,Sj)
    SS = np.dot(Sj.T,Sj)
    Ep = Sj@Yj - XX@beta_j
    
    
    h11 = -np.trace(G@G)-np.dot(Yj.T@WW,Yj)/sigma2_j
    h12 = h21 = -(XX.T@(Wt@Yj)/sigma2_j).reshape(q0,1)
    h31 = h13 = -np.dot((Wt@Yj).T,Ep)/(sigma2_j**2)
    
    h22 = -XX.T@XX/sigma2_j
    h23 = h32 = -(XX.T@Ep/(2*sigma2_j**2)).reshape(q0,1)
    h33 = nn/(2*sigma2_j**2)-np.dot(Yj.T@SS,Yj)/(sigma2_j**3)
    
    H = np.block([
         [h11, h12.T, h13],
         [h21, h22, h23],
         [h31, h32.T, h33]
    ])
    
    return H

In [12]:
# CMLE-hessian -full q
def hessian_initial_lj(nn, p, q, Wt, para, Yj, XX):
    
  
    rho_j = para[0]
    beta_j = para[1:(q+1)]
    sigma2_j = para[-1]
    
    Sj = np.eye(nn)-rho_j*Wt
    S_inverse = np.linalg.inv(Sj)
    G = np.dot(Wt,S_inverse)
    WW = np.dot(Wt.T,Wt)
    WS = np.dot(Wt.T,Sj)
    SS = np.dot(Sj.T,Sj)
    Ep = Sj@Yj - XX@beta_j
    
    
    h11 = -np.trace(G@G)-np.dot(Yj.T@WW,Yj)/sigma2_j
    h12 = h21 = -(XX.T@(Wt@Yj)/sigma2_j).reshape(q,1)
    h31 = h13 = -np.dot((Wt@Yj).T,Ep)/(sigma2_j**2)
    
    h22 = -XX.T@XX/sigma2_j
    h23 = h32 = -(XX.T@Ep/(2*sigma2_j**2)).reshape(q,1)
    h33 = nn/(2*sigma2_j**2)-np.dot(Yj.T@SS,Yj)/(sigma2_j**3)
    
    H = np.block([
         [h11, h12.T, h13],
         [h21, h22, h23],
         [h31, h32.T, h33]
    ])
    
    return H

In [14]:
# Newton-CMLE
def newton_sea_initial_0(nn, p, q, Wt, pa0, Yj, XX, max_iter = 50, eps = 1e-4):
    
    q0 = 10
    pa_new = pa0
    for t in range(max_iter):
        pa_pre = pa_new
        gradient = gardient_initial_lj_0(nn, p, q, Wt, pa_pre, Yj, XX)/nn 
        hessian =  hessian_initial_lj_0(nn, p, q, Wt, pa_pre, Yj, XX)/nn 
        diff = np.linalg.solve(hessian+0.001*np.eye(q0+2),gradient)
        pa_new = pa_pre - diff 
        if pa_new[-1]<0.01: pa_new[-1] = 0.01
        if pa_new[0]>1: pa_new[0] = 0.95
        #print(np.max(abs(diff)))
        if np.linalg.norm(diff) < eps:
            break
            
    return pa_new,t+1

In [13]:
# Newton-CMLE-full q
def newton_sea_initial(nn, p, q, Wt, pa0, Yj, XX, max_iter = 100, eps = 1e-4):
    
    pa_new = pa0
    for t in range(max_iter):
        pa_pre = pa_new
        gradient = gardient_initial_lj(nn, p, q, Wt, pa_pre, Yj, XX)/nn 
        hessian =  hessian_initial_lj(nn, p, q, Wt, pa_pre, Yj, XX)/nn 
        diff = np.linalg.solve(hessian+0.001*np.eye(q+2),gradient)
        pa_new = pa_pre - diff 
        if pa_new[-1]<0.1: pa_new[-1] = 0.1
        if pa_new[0]>1: pa_new[0] = 0.95
        # print(np.max(abs(diff)))
        if np.linalg.norm(diff) < eps:
            break
            
    return pa_new,t+1

In [15]:
# SCAD deriative function
def SCAD_deriative_beta(beta_t, lamba, a = 3.7):
    
    abs_beta = np.abs(beta_t)
    grad = np.zeros_like(beta_t)
    
    mask1 = (abs_beta <= lamba)
    mask2 = (abs_beta > lamba) & (abs_beta <= a*lamba)
    
    grad[mask1] = lamba*np.sign(beta_t[mask1])
    grad[mask2] = ((a * lamba - abs_beta[mask2])/(a - 1))*np.sign(beta_t[mask2])
    
    return grad

In [16]:
# Newton-CMLE with SCAD
def newton_sea_SCAD(nn, p, q, Wt, paj, Yj, XX, lamba, a=3.7, max_iter = 50, eps = 1e-3):
    
    rho_j = paj[0]
    beta_new = paj[1:(q+1)]
    sigma2_j = paj[-1]
    for t in range(max_iter):
        beta_pre = beta_new
        Ep = (np.eye(nn)-rho_j*Wt)@Yj - XX@beta_pre
        gradient_beta = -(XX.T@Ep)
        hessian_beta = XX.T@XX
        grad_SCAD = SCAD_deriative_beta(beta_pre, lamba, a)
        S_lam_beta = np.diag(grad_SCAD)/abs(beta_pre)
        diff = np.linalg.inv(hessian_beta + nn*S_lam_beta)@(gradient_beta + nn*S_lam_beta@beta_pre)
        
        beta_new = beta_pre - diff 
        #print(np.max(abs(diff)))
        if np.linalg.norm(diff) < eps:
            break
            
    return beta_new,t+1

In [18]:
def log_likelihood_sar(rho_j, beta_j, sigma2_j, YY, XX, Wt):
    
    nn = len(YY)
    qq = len(beta_j)
    
    A = np.eye(nn) - rho_j * Wt
    det_term = np.log(np.linalg.det(A))  
    residual = YY - rho_j * Wt @ YY - XX @ beta_j
    loglik = - np.log(2 * np.pi * sigma2_j)/2 + det_term/nn - (residual.T @ residual) / (2 * sigma2_j * nn)
    
    return -loglik  # negative log-likelihood

In [19]:
thre = 1e-3
BICn = 100
par = np.zeros(q+2)
par[-1] = 1
sudu = (np.log(q*p)/n)**0.5 #(np.log(q*p)/n)**0.5
lam_set = np.linspace(sudu**9,2*sudu**0.5, BICn) #2*sudu**0.5
bic_sh = np.log(n)*(np.log(p*q))/n

In [None]:
pip install ray
pip install -U ipywidgets

In [None]:
import ray

ray.init(num_cpus=NUM_CPU, ignore_reinit_error=True)

In [21]:
# 整体过程
@ray.remote(num_cpus=4) 
def map_fun_BIC(bb):
    
    Y, X, Z = data_generator(n, p, q, d, seed = bb+166)
    
    BIC_set = np.zeros((BICn,p))
    Ln_j_set = np.zeros((BICn,p))
    theta_ini = np.zeros((p, 2+q))
    for j in range(p):
        ticn1 = time()
        theta_ini[j,:] = newton_sea_initial(n, p, q, W, par, Y[:,j], X)[0]
        rho0_h = theta_ini[j,:][0]
        beta0_h = theta_ini[j,:][1:(q+1)]
        sigma20_h = theta_ini[j,:][-1]
        for b in range(BICn):
            lambda_ = lam_set[b]
            beta_est = newton_sea_SCAD(n, p, q, W, theta_ini[j,:], Y[:,j], X, lambda_)
            beta_scad = np.where(beta_est[0]<thre, 0, beta_est[0])
            Ln_j_set[b,j] = log_likelihood_sar(rho0_h, beta_scad, sigma20_h, Y[:,j], X, W)
            BIC_set[b,j] = Ln_j_set[b,j] + len(np.where(beta_scad!=0)[0])*bic_sh
        tocn1 = time()
        argBIC = np.argmin(BIC_set[:,j])
        print(bb, j, argBIC, tocn1 - ticn1) 
        with open('Results_dep_BIC/process.txt', 'a') as f1:
            f1.write(str(bb) + ', '+ str(j) + ', '+ str(argBIC) +'\n')
        
    min_index = np.argmin(BIC_set, axis=0)
    beta_estt = np.zeros((p,q))
    for j in range(p):
        lambda_ = lam_set[int(min_index[j])]
        beta_estt[j,:] = newton_sea_SCAD(n, p, q, W, theta_ini[j,:], Y[:,j], X, lambda_)[0]        
    return BIC_set.T, min_index, beta_estt

In [None]:
BB = 100
tic1 = time()
tasks = [map_fun_BIC.remote(bb) for bb in range(BB)]
resultsn1500_p200BIC = ray.get(tasks)  # 等待所有任务完成
toc1 = time()
print(toc1 - tic1) # 总的计算时间

[36m(map_fun_BIC pid=569)[0m 5 0 21 90.47266268730164
[36m(map_fun_BIC pid=574)[0m 1 0 24 91.0824363231659
[36m(map_fun_BIC pid=571)[0m 4 0 24 91.97673487663269
[36m(map_fun_BIC pid=573)[0m 0 0 25 92.94851779937744
[36m(map_fun_BIC pid=575)[0m 2 0 18 93.98109722137451
[36m(map_fun_BIC pid=570)[0m 3 0 25 96.73275971412659
[36m(map_fun_BIC pid=573)[0m 0 1 24 60.1477210521698
[36m(map_fun_BIC pid=575)[0m 2 1 27 60.781996965408325
[36m(map_fun_BIC pid=574)[0m 1 1 28 67.05420088768005
[36m(map_fun_BIC pid=570)[0m 3 1 25 62.23113799095154
[36m(map_fun_BIC pid=569)[0m 5 1 15 70.72271275520325
[36m(map_fun_BIC pid=571)[0m 4 1 29 71.50028848648071
[36m(map_fun_BIC pid=573)[0m 0 2 24 61.611202001571655
[36m(map_fun_BIC pid=575)[0m 2 2 24 65.43160200119019
[36m(map_fun_BIC pid=570)[0m 3 2 28 62.18752646446228
[36m(map_fun_BIC pid=574)[0m 1 2 24 69.18128871917725
[36m(map_fun_BIC pid=571)[0m 4 2 22 65.4130163192749
[36m(map_fun_BIC pid=569)[0m 5 2 23 73.51957798

In [204]:
ray.shutdown()

In [252]:
BIC_set_n1500_p200_B100 = np.zeros((BB,BICn,p))
min_index_n1500_p200_B100 = np.zeros((BB,p))
beta_est_n1500_p200_B100 = np.zeros((BB,p,q))
for bt in range(BB):
    BIC_set_n1500_p200_B100[bt,:,:], min_index_n1500_p200_B100[bt,:], beta_est_n1500_p200_B100[bt,:,:] = resultsn1500_p200BIC[bt][0].T, resultsn1500_p200BIC[bt][1], resultsn1500_p200BIC[bt][2]

In [None]:
# averge selection consistency
1 - np.mean(abs((beta_est_n1500_p200_B100[:,:,:] > thre).astype(int) - (beta > thre).astype(int)))

In [254]:
# uniform selection consistency
1 - len(set(np.where((beta_est_n1500_p200_B100[:,:,:] > thre).astype(int) - (beta > thre).astype(int) != 0)[0]))/100

0.78

In [255]:
# Save resusts
pd.DataFrame(min_index_n1500_p200_B100).to_csv("Results_dep_BIC/n1500_p200_B100/min_index_dep_n1500_p200.csv",index=False)
for b in range(BB):
    pd.DataFrame(BIC_set_n1500_p200_B100[b,:,:]).to_csv("Results_dep_BIC/n1500_p200_B100/BIC_set_dep_n1500_p200_"+str(b)+'_.csv',index=False)
    pd.DataFrame(beta_est_n1500_p200_B100[b,:,:]).to_csv("Results_dep_BIC/n1500_p200_B100/beta_est_dep_n1500_p200_"+str(b)+'_.csv',index=False)