### View the current logical CPU count of the server

In [1]:
import os

NUM_CPU = len(os.sched_getaffinity(0)) #os.cpu_count() 

print(f'CPU total: {NUM_CPU}')

CPU total: 128


### Limit the number of threads that can be called by a single process

In [2]:
NUM_THREADS = 4 

os.environ["MKL_NUM_THREADS"]     = str(NUM_THREADS)
os.environ["NUMEXPR_NUM_THREADS"] = str(NUM_THREADS)
os.environ["OMP_NUM_THREADS"]     = str(NUM_THREADS)

NUM_PROCESS = NUM_CPU // NUM_THREADS
print(f'Maximum number of parallel processes: {NUM_PROCESS}')

Maximum number of parallel processes: 32


### Import numpy, multiprocessing and other packages

In [4]:
import numpy as np
from numpy.random import default_rng
from time import time
import multiprocessing as mp
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as st
import random
import math

### Global invariant parameters

In [5]:
K = 5; q = 8
#True parameters
sigma = 1; theta = [3,1.5,0,0,2,0,0,0]; gamma = list(range(-4,3*K-4,3));
pi = [0.15,0.2,0.3,0.25,0.1]
mean = np.zeros(q)
rho = 0.5
i, j = np.mgrid[:q, :q]
cov = rho**abs(i-j)

### Some functions needed for global calculations

In [38]:
def dup_rows(a, indx, num_dups=1):
    return np.insert(a,[indx+1]*num_dups,a[indx],axis=0)

def dup_cols(a, indx, num_dups=1):
    return np.insert(a,[indx+1]*num_dups,a[:,[indx]],axis=1)
def function_exp(x):
    return np.exp(x)
function_vexp = np.vectorize(function_exp)
def function_bin(p,x):
    return (p**x)*(1-p)**(1-x)
function_vbin = np.vectorize(function_bin)
def function_binlog(p,x):
    return (x*np.log(p)+(1-x)*np.log(1-p))
function_vbinlog = np.vectorize(function_binlog)

### 1. Simulation Data Generator--X,Y,Z,a_ik

In [7]:
def data_generator(n,p,rho_kj,seed):
    
    rng = default_rng(seed) #Setting the seed for the random number generator
    X = rng.multivariate_normal(mean, cov, (n,), 'raise')   # X nxq
    mk_class = rng.multinomial(n, pvals=pi)
    #Generate mK_gamma
    mK_gamma = []
    mK = []
    for k in range(K):
        idt = np.ones(int(mk_class[k]))
        mK.extend(idt*(k))
        mK_gamma.extend(idt*gamma[k])
    mK = [int(k) for k in mK]
    # Y
    epsilon = list(rng.normal(size=n))
    Y = mK_gamma + np.dot(X, theta) + epsilon
    # Z
    Z = np.zeros((n,p))
    for k in range(K):
        ki_ind = [i for i,x in enumerate(mK) if x==k]
        for j in range(p):
            Z[ki_ind,j] = rng.binomial(1,rho_kj.iloc[k,j],len(ki_ind))
            
    return [X, Y, Z, mK]

### 2. Function to compute pi_ik

In [39]:
def pi_ik_est(n,p,K,initial_est, p_est, X, Y, Z):
    #initial estimator
    pi_ini_est = initial_est[0:K]; gamma_ini_est = initial_est[K:2*K]
    theta_ini_est = initial_est[2*K:2*K+q]; sigma_ini_est = initial_est[-1]
    
    #pi_ik_est 
    c = Y-np.dot(X,theta_ini_est)
    g = np.array(gamma_ini_est)
    g_pi = np.array(pi_ini_est)
    a1 = (np.ones((K,n))*c.T).T
    a2 = np.ones((n,K))*g
    e_pri = -(a1-a2)**2/(2*sigma_ini_est**2)
    e_pri = function_vexp(e_pri)
    eb_pri = np.zeros((n,K))
    bigsum_nk = np.zeros((n,K))
    for k in range(K):
        bigsum_nk[:,k] = np.sum(function_vbinlog(p_est[k,:],Z), axis=1)
    bigsum_maxk = np.max(bigsum_nk, axis=1)
    for k in range(K):
        eb_pri[:,k] = function_vexp(bigsum_nk[:,k]-bigsum_maxk)
    a_pi = np.ones((n,K))*g_pi
    pi_est = (np.divide((eb_pri*a_pi).T,(np.sum(eb_pri*a_pi,1)).T)).T

    return pi_est

### 3. Define a mapping: random number seed$\mapsto$p estimator

In [18]:
def map_fun(b):
    
    X,Y,Z,a_ik = data_generator(n,p, rho_kj, seed = b)     #Generate simulation data
    pi_est = pi_ik_est(n,p,K,initial_est, p_est, X, Y, Z)  #Compute pi_ik
    
    return [pi_est, a_ik] 

### 4. Constants setting for simulation

In [101]:
n = 5000; p = 5000;  #Dimension
B = 500;             #Replication

In [102]:
#True p_kj
rho_kj = pd.read_csv("new_R500_responce/rho_5000_5000.csv")
#p_kj Estimation
rho_est_n5000_p5000 = np.zeros((B,K,p))
for b in range(B):
    rho_est_n5000_p5000[b,:,:] = pd.read_csv("new_R500_responce/n5000_p5000/rho_est_5000_5000_"+str(b)+".csv")
p_est = np.mean(rho_est_n5000_p5000, axis = 0)
#Initial estimators mean
initial_est = np.mean(pd.read_csv("R500_initial/initial_est_n5000.csv"),axis=0).tolist()

### 5. Calling multiple processes for simulation experiments

In [106]:
tic1 = time()

with mp.Pool(NUM_PROCESS) as pool:                  # Calling the NUM_PROCESS process
    Results1 = pool.map(map_fun, range(B))

toc1 = time()
print(toc1 - tic1)                                  # Total computing time

15811.333384752274


### 6. Obtain Results

In [107]:
pi_est_n5000_p5000 = np.zeros((B,n,K))
a_ik_n5000_p5000 = np.ones((B,n))
for b in range(B):
    pi_est_n5000_p5000[b,:,:] = Results1[b][0]
    a_ik_n5000_p5000[b,:] = Results1[b][1]

In [None]:
for b in range(B):
    pd.DataFrame(pi_est_n5000_p5000[b,:,:]).to_csv("R500_posterior/n5000_p5000/pi_est_5000_5000_"+str(b)+".csv",index=False)
pd.DataFrame(a_ik_n5000_p5000).to_csv("R500_posterior/a_ik_5000_5000.csv",index=False)