In [1]:
import numpy as np
import pandas as pd
import os
from tqdm import tqdm
from benchmarks import *

In [2]:
from joblib import Parallel,delayed

In [3]:
def parallel_unit(i):

    # data path
    data_path = './save/simu_data/'
    
    # load the data
    data_full_path = data_path + 'd' + str(i) + '.npy'
    dat = np.load(data_full_path,allow_pickle=True)

    # unzip the data
    x = dat.item()['x']
    z = dat.item()['z']
    y1 = dat.item()['y1']
    y2 = dat.item()['y2']
    y3 = dat.item()['y3']


    save_folder = './save/benchmarks/'+str(i)+'/'

    if not os.path.exists(save_folder):
        print('Creat the folder.')
        os.makedirs(save_folder)
    
    
    # Matching (M-N-m)
    if not os.path.exists(save_folder+'match_mnm.csv'):
        print('Perform Matching(M-N-m).')
        # fit logistic model for propensity scores
        _, ps_score = glm_wrapper(x,z)

        # tau
        tau_match_mnm = [] 
        for y in [y1,y2,y3]:
            tau_match_mnm.append(match_wrapper(y,z,ps_score,None,False))
        tau_match_mnm = np.array(tau_match_mnm)

        df_match_mnm = pd.DataFrame(tau_match_mnm,columns=['tauhat','95CI_lb','95CI_ub'])
        df_match_mnm['method'] = 'M-N-m'

        df_match_mnm.to_csv(save_folder+'match_mnm.csv',index=False)

    else:
        print('Skip! Matching(M-N-m) has been performed.')
        
    
    # Matching (M-C-m)
    if not os.path.exists(save_folder+'match_mcm.csv'):
        print('Perform Matching(M-N-m).')
        # fit logistic model for propensity scores
        _, ps_score = glm_wrapper(x,z)

        # tau
        tau_match_mcm = [] 
        for y in [y1,y2,y3]:
            tau_match_mcm.append(match_wrapper(y,z,ps_score,x,True))
        tau_match_mcm = np.array(tau_match_mcm)

        df_match_mcm = pd.DataFrame(tau_match_mcm,columns=['tauhat','95CI_lb','95CI_ub'])
        df_match_mcm['method'] = 'M-C-m'

        df_match_mnm.to_csv(save_folder+'match_mcm.csv',index=False)

    else:
        print('Skip! Matching(M-C-m) has been performed.')
    
    # IPW
    if not os.path.exists(save_folder+'ipw.csv'):
        print('Perform IPW.')
        
        # tau
        tau_ipw1 = [] 
        for y in [y1,y2,y3]:
            tau_ipw1.append(ipw1_wrapper(y,z,x))

        df_ipw1 = pd.DataFrame(tau_ipw1,columns=['tauhat','95CI_lb','95CI_ub'])
        df_ipw1['method'] = 'IPW1'
        
        df_ipw1.to_csv(save_folder+'ipw.csv',index=False)
        
    else:
        print('Skip! IPW has been performed.')
        
        
    # DR
    if not os.path.exists(save_folder+'dr.csv'):
        print('Perform DR.')
        
        tau_dr = [] 
        for y in [y1,y2,y3]:
            tau_dr.append(dr_wrapper(y,z,x))

        df_dr = pd.DataFrame(tau_dr,columns=['tauhat','95CI_lb','95CI_ub'])
        df_dr['method'] = 'DR'
        
        df_dr.to_csv(save_folder+'dr.csv',index=False)
        
    else:
        print('Skip! DR has been performed.')

In [4]:
n_kernel = 20
n_data = 1000

In [5]:
Parallel(n_jobs=n_kernel)(delayed(parallel_unit)(i=i) for i in tqdm(range(n_data)))

100%|██████████| 1000/1000 [00:05<00:00, 170.65it/s]


[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,