- 0304: 
    - leverage the hypothesis test to choose the proper allocation， strategy 3
    - record the ratio of acceptable weights in the simulation
    
- 0312:
    - use new loss (rbf kernel)
    
- 0315:
    - use polynomial kernel for the loss
    
- 0317:
    - use rbf kernel (rough tuned, smaller batch size) with new network structure
    
- 0318:
    - determine the bandwidth of rbf kernel
    
- 0323:
    - change the bandwidth of rbf kernel as 20

- 0324:
    - change the bandwidth of rbf kernel as 10

- 0326:
    - introduce optimization for the bandwidth of rbf kernel
    
- 0426:
    - change the hyper-parameter of branson's test to 0.15

- 0428:
    - update the network script
    
- 0430:
    - update the network script, add weight regularities
    - change the name of S1 and S2
    
- 0506:
    - fix how to generate the rer response
    
- 0515:
    - standardize the method
- 0521:
    - increase patience
    - change the hyper-parameter term
- 0525:
    - fix a bug on ReR generation
    - add auto-saving for ReR

In [1]:
import os
import itertools
from sklearn.preprocessing import StandardScaler

In [2]:
import torch
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sb
import pandas as pd
from tqdm import tqdm
from scipy.stats import ks_2samp

from datagen import *
from network import *

from joblib import Parallel, delayed

In [3]:
def y_rer_gen(y,z,z_rer,tau=1):
    return y + tau*(z_rer - z)

In [4]:
def expand_grid(data_dict):
    """Create a dataframe from every combination of given values."""
    rows = itertools.product(*data_dict.values())
    return pd.DataFrame.from_records(rows, columns=data_dict.keys())

In [5]:
net_params = {'data_path': ['./save/simu_data/'],
 'batch_size': [512],
 'lr': [0.001],
 'pa': [0.1,0.5,1],
 'val_metric': ['KS'],
 'num_nodes': [512],
 'num_iters': [5000],
 'num_init_iters': [500],
 'x_lambda': [1],
 'wt_lambda': [1],
 'patience': [15],
 'kernel_params': [{'kernel':'rbf',
                 'gamma':10,
                 'degree':2,
                 'c':0}],
 'random_state': [0]}

In [6]:
save_folder_root = './save/qrwg_rer_fp0521/'

In [7]:
param_df = expand_grid(net_params)

In [8]:
param_df

Unnamed: 0,data_path,batch_size,lr,pa,val_metric,num_nodes,num_iters,num_init_iters,x_lambda,wt_lambda,patience,kernel_params,random_state
0,./save/simu_data/,512,0.001,0.1,KS,512,5000,500,1,1,15,"{'kernel': 'rbf', 'gamma': 10, 'degree': 2, 'c...",0
1,./save/simu_data/,512,0.001,0.5,KS,512,5000,500,1,1,15,"{'kernel': 'rbf', 'gamma': 10, 'degree': 2, 'c...",0
2,./save/simu_data/,512,0.001,1.0,KS,512,5000,500,1,1,15,"{'kernel': 'rbf', 'gamma': 10, 'degree': 2, 'c...",0


## Two Strategies

- Strategy 1 (S1): point estimator (avg wts) + CI (draw from single wt) 
- Strategy 2 (S2): point estimator (single wt) + CI (draw from single wt)
- Strategy 3 (S3): point estimator (acceptable wt) + CI (draw from single wt)
- Strategy 4 (S4): point estimator (acceptable wt) + CI (draw from acceptable wt)
<!-- - Strategy 3 (S3): point estimator (avg wt) + CI (draw from single wts) -->

In [9]:
def allocation_test(mdist_obs,mdist_array):
    return (np.sum(mdist_array>=mdist_obs)+1)/(mdist_array.shape[0]+1)

In [10]:
def parallel_unit(i,r,scenario,batch_size,
                  lr,pa,
                  num_nodes,num_iters,
                  num_init_iters,
                  x_lambda,
                  wt_lambda,
                  kernel_params,val_metric,
                  patience,random_state,
                  data_path):
  
    print('------------- Data:',i,'------------- ')

    # load the data
    data_full_path = data_path+ scenario+'/d' + str(i) + '.npy'
    dat = np.load(data_full_path,allow_pickle=True)

    # unzip the data
    x = dat.item()['x']
    z = dat.item()['z']
    y1 = dat.item()['y1']
    y2 = dat.item()['y2']
    y3 = dat.item()['y3']
    
    nt = int(z.sum())
    x = x[:nt*(1+r)]
    z = z[:nt*(1+r)]
    y1 = y1[:nt*(1+r)]
    y2 = y2[:nt*(1+r)]
    y3 = y3[:nt*(1+r)]
    
    sc = StandardScaler()
    xx = x.copy() # original x
    x = sc.fit_transform(x) # standardize the covariates
    
    # only use the mean difference loss
    save_folder = save_folder_root+scenario+'/'+'r='+str(r)+'/pa='+str(pa)+'/'+str(i)+'/'
    
    if not os.path.exists(save_folder):
        print('Creat the folder.')
        os.makedirs(save_folder)

    if not os.path.exists(save_folder+'final_checkpoint.pt'):
        print('Train the model from scratch.')

        estimator = QRWG(lr=lr,
                          batch_size=batch_size,
                          patience=patience,
                          num_iters=num_iters,
                          num_init_iters=num_init_iters,
                          pa=pa,
                          x_lambda=x_lambda,
                          wt_lambda=wt_lambda,
                          num_nodes=num_nodes,
                          val_metric=val_metric,
                          save_folder=save_folder,
                          kernel_params=kernel_params,
                          verbose=False,
                          random_state=random_state)

        # train the model from scratch
        estimator.fit(x,z)

    else:
        print('Skip! The model has been trained.')
        estimator = QRWG(lr=lr,
                          batch_size=batch_size,
                          patience=patience,
                          num_iters=num_iters,
                          num_init_iters=1,
                          pa=pa,
                          num_nodes=num_nodes,
                          val_metric=val_metric,
                          save_folder=save_folder,
                          kernel_params=kernel_params,
                          verbose=False,
                          random_state=random_state)
        estimator.w = z
        estimator.nwts = int(estimator.w.shape[0])
        estimator.nt = int(z.sum())
        estimator.nc = int((1-z).sum())
        estimator._init_network()
        estimator.netG.load_state_dict(torch.load(save_folder+'final_checkpoint.pt'))
    
    np.random.seed(i)
    torch.manual_seed(i)
    
    z_rer = ReR(pa,torch.Tensor(x),np.sum(z))[0].numpy()
    wts_mat_net = estimator.predict(1000).numpy()
    
    if not os.path.exists(save_folder+'zmat.npy'):
        z_rer_mat = np.array([ReR(pa,torch.Tensor(x),np.sum(z))[0].numpy() for i in range(1000)])
        np.save(save_folder+'zmat.npy',z_rer_mat)
    else:
        z_rer_mat = np.load(save_folder+'zmat.npy')
        
    if not os.path.exists(save_folder+'mdist.npy'):
        mdist_array = np.array([ReR(pa,torch.Tensor(x),np.sum(z))[1].item() for i in range(1000)])
        np.save(save_folder+'mdist.npy',mdist_array)
    else:
        mdist_array = np.load(save_folder+'mdist.npy')
    
    mdiff_mat_net = np.array([cov_mdiff(x,z,wts_mat_net[i]) for i in range(1000)])
    mdiff_mat_rer = np.array([cov_mdiff(x,z_rer_mat[i]) for i in range(1000)])
    xmdiff_ks, xmdiff_pval = np.array([ks_2samp(mdiff_mat_net[:,i],
                                                mdiff_mat_rer[:,i]) for i in range(mdiff_mat_net.shape[1])]).mean(axis=0)
#     xmdiff_ks, xmdiff_pval = np.median(np.array([ks_2samp(mdiff_mat_net[:,i],
#                                                 mdiff_mat_rer[:,i]) for i in range(mdiff_mat_net.shape[1])]),axis=0)
    
    test_array = np.array([allocation_test(maha_dist(x,z,wts_mat_net[i]).item(),mdist_array) for i in range(1000)])
    accept_ratio = np.mean(test_array>0.15)
    
    # generate acceptable weights
#     wts_mat_net_fea = wts_mat_net[test_array>0.05]
#     while wts_mat_net_fea.shape[0]<1000:
#         wts_mat_net_tmp = estimator.predict(1000).numpy()
#         test_array_tmp = np.array([allocation_test(maha_dist(x,z,wts_mat_net_tmp[i]).item(),mdist_array) for i in range(1000)])
#         wts_mat_net_fea = np.concatenate([wts_mat_net_fea,wts_mat_net_tmp[test_array_tmp>0.05]],axis=0)
#     wts_mat_net_fea = wts_mat_net_fea[:1000]
    
    if not os.path.exists(save_folder+'qrwg_est.csv'):
        # compare different strategies
        # strategy 1:
        wts = wts_mat_net[0]

        est1_s1 = tau_diff(y1,z,wts)
        est2_s1 = tau_diff(y2,z,wts)
        est3_s1 = tau_diff(y3,z,wts)

        ci1_s1 = ri_ci(y1,z,est1_s1,z_rer_mat)
        ci2_s1 = ri_ci(y2,z,est2_s1,z_rer_mat)
        ci3_s1 = ri_ci(y3,z,est3_s1,z_rer_mat)

        df_est_s1 = pd.DataFrame({
          'tauhat': [est1_s1,est2_s1,est3_s1],
          "95CI_lb": [ci1_s1[0],ci2_s1[0],ci3_s1[0]],
          "95CI_ub": [ci1_s1[1],ci2_s1[1],ci3_s1[1]],
          'type': 'S1'
        })
        
        
        # strategy 2:
        avg_wts = wts_mat_net.mean(axis=0)

        est1_s2 = tau_diff(y1,z,avg_wts)
        est2_s2 = tau_diff(y2,z,avg_wts)
        est3_s2 = tau_diff(y3,z,avg_wts)

        ci1_s2 = ri_ci(y1,z,est1_s2,z_rer_mat)
        ci2_s2 = ri_ci(y2,z,est2_s2,z_rer_mat)
        ci3_s2 = ri_ci(y3,z,est3_s2,z_rer_mat)

        df_est_s2 = pd.DataFrame({
          'tauhat': [est1_s2,est2_s2,est3_s2],
          "95CI_lb": [ci1_s2[0],ci2_s2[0],ci3_s2[0]],
          "95CI_ub": [ci1_s2[1],ci2_s2[1],ci3_s2[1]],
          'type': 'S2'
        })
        
        # strategy 3:
        # wts = wts_mat_net[np.cumsum(test_array>0.05)==1][0]
        try:
            wts = wts_mat_net[np.cumsum(test_array>0.15)==1][0]
        except:
            wts_mat_net_tmp = estimator.predict(1000).numpy()
            test_array_tmp = np.array([allocation_test(maha_dist(x,z,wts_mat_net_tmp[i]).item(),mdist_array) for i in range(1000)])
            while np.mean(test_array_tmp>0.15)==0:
                wts_mat_net_tmp = estimator.predict(1000).numpy()
                test_array_tmp = np.array([allocation_test(maha_dist(x,z,wts_mat_net_tmp[i]).item(),mdist_array) for i in range(1000)])
            wts = wts_mat_net_tmp[np.cumsum(test_array_tmp>0.15)==1][0]
        
        est1_s3 = tau_diff(y1,z,wts)
        est2_s3 = tau_diff(y2,z,wts)
        est3_s3 = tau_diff(y3,z,wts)

        ci1_s3 = ri_ci(y1,z,est1_s3,z_rer_mat)
        ci2_s3 = ri_ci(y2,z,est2_s3,z_rer_mat)
        ci3_s3 = ri_ci(y3,z,est3_s3,z_rer_mat)

        df_est_s3 = pd.DataFrame({
          'tauhat': [est1_s3,est2_s3,est3_s3],
          "95CI_lb": [ci1_s3[0],ci2_s3[0],ci3_s3[0]],
          "95CI_ub": [ci1_s3[1],ci2_s3[1],ci3_s3[1]],
          'type': 'S3'
        })
        
        
        df_est = pd.concat([df_est_s1,df_est_s2,df_est_s3],axis=0)
        df_est.to_csv(save_folder+"qrwg_est.csv",index=False)
    else:
        print('Skip! QRWG has been considered')
        
    if not os.path.exists(save_folder+'rer_est.csv'):
    #if os.path.exists(save_folder+'rer_est.csv'):
        
        # generate data from the original covariates
        y1_rer, y2_rer, y3_rer = y_gen(xx,z_rer) 
#         y1_rer = y_rer_gen(y1,z,z_rer)
#         y2_rer = y_rer_gen(y2,z,z_rer)
#         y3_rer = y_rer_gen(y3,z,z_rer)
        
        est1_rer = tau_diff(y1_rer,z_rer)
        est2_rer = tau_diff(y2_rer,z_rer)
        est3_rer = tau_diff(y3_rer,z_rer)
        
        ci1_rer = ri_ci(y1_rer,z_rer,est1_rer,z_rer_mat)
        ci2_rer = ri_ci(y2_rer,z_rer,est2_rer,z_rer_mat)
        ci3_rer = ri_ci(y3_rer,z_rer,est3_rer,z_rer_mat)
        
        df_ci = pd.DataFrame({
          'tauhat': [est1_rer,est2_rer,est3_rer],
          "95rerCI_lb": [ci1_rer[0],ci2_rer[0],ci3_rer[0]],
          "95rerCI_ub": [ci1_rer[1],ci2_rer[1],ci3_rer[1]]
        })
        
        df_ci.to_csv(save_folder+"rer_est.csv",index=False)
    else:
        print('Skip! ReR has been considered')

    return pd.read_csv(save_folder+"qrwg_est.csv").values, pd.read_csv(save_folder+"rer_est.csv").values, xmdiff_ks, xmdiff_pval, accept_ratio

In [11]:
# i=0
# r=1
# kwargs = dict(param_df.iloc[0,:])
# data_path = kwargs['data_path']
# pa = 0.1
# lr = 1e-3
# batch_size=512
# patience=15
# num_iters=1000
# num_nodes=512
# val_metric='KS'
# random_state=0
# kernel_params = kwargs['kernel_params']
# scenario='scenario1'


In [12]:
# print('------------- Data:',i,'------------- ')

# # load the data
# data_full_path = data_path+ scenario+'/d' + str(i) + '.npy'
# dat = np.load(data_full_path,allow_pickle=True)

# # unzip the data
# x = dat.item()['x']
# z = dat.item()['z']
# y1 = dat.item()['y1']
# y2 = dat.item()['y2']
# y3 = dat.item()['y3']

# nt = int(z.sum())
# x = x[:nt*(1+r)]
# z = z[:nt*(1+r)]
# y1 = y1[:nt*(1+r)]
# y2 = y2[:nt*(1+r)]
# y3 = y3[:nt*(1+r)]

# sc = StandardScaler()
# xx = x.copy() # original x
# x = sc.fit_transform(x) # standardize the covariates

# # only use the mean difference loss
# save_folder = save_folder_root+scenario+'/'+'r='+str(r)+'/pa='+str(pa)+'/'+str(i)+'/'

# if not os.path.exists(save_folder):
#     print('Creat the folder.')
#     os.makedirs(save_folder)

# if not os.path.exists(save_folder+'final_checkpoint.pt'):
#     print('Train the model from scratch.')

#     estimator = QRWG(lr=lr,
#                       batch_size=batch_size,
#                       patience=patience,
#                       num_iters=num_iters,
#                       num_init_iters=num_init_iters,
#                       pa=pa,
#                       x_lambda=x_lambda,
#                       wt_lambda=wt_lambda,
#                       num_nodes=num_nodes,
#                       val_metric=val_metric,
#                       save_folder=save_folder,
#                       kernel_params=kernel_params,
#                       verbose=False,
#                       random_state=random_state)

#     # train the model from scratch
#     estimator.fit(x,z)

# else:
#     print('Skip! The model has been trained.')
#     estimator = QRWG(lr=lr,
#                       batch_size=batch_size,
#                       patience=patience,
#                       num_iters=num_iters,
#                       num_init_iters=1,
#                       pa=pa,
#                       num_nodes=num_nodes,
#                       val_metric=val_metric,
#                       save_folder=save_folder,
#                       kernel_params=kernel_params,
#                       verbose=False,
#                       random_state=random_state)
#     estimator.w = z
#     estimator.nwts = int(estimator.w.shape[0])
#     estimator.nt = int(z.sum())
#     estimator.nc = int((1-z).sum())
#     estimator._init_network()
#     estimator.netG.load_state_dict(torch.load(save_folder+'final_checkpoint.pt'))

In [13]:
# np.random.seed(i)
# torch.manual_seed(i)

# z_rer = ReR(pa,torch.Tensor(x),np.sum(z))[0].numpy()
# wts_mat_net = estimator.predict(1000).numpy()

In [14]:
# if not os.path.exists(save_folder+'zmat.npy'):
#     z_rer_mat = np.array([ReR(pa,torch.Tensor(x),np.sum(z))[0].numpy() for i in range(1000)])
#     #np.save(save_folder+'zmat.npy',z_rer_mat)
# else:
#     z_rer_mat = np.load(save_folder+'zmat.npy')

# if not os.path.exists(save_folder+'mdist.npy'):
#     mdist_array = np.array([ReR(pa,torch.Tensor(x),np.sum(z))[1].item() for i in range(1000)])
#     #np.save(save_folder+'mdist.npy',mdist_array)
# else:
#     mdist_array = np.load(save_folder+'mdist.npy')

In [15]:
n_kernel = 40
n_data = 200
tau = 1

result1_df = []
result2_df = []
result3_df = []

result1_rer_df = []
result2_rer_df = []
result3_rer_df = []

xmdiff_df_lst = []

accept_ratio_df_lst = []

In [16]:
# kwargs

In [17]:
# for i in tqdm(range(200)):
#     print('i',i)
#     parallel_unit(i=i,r=r,scenario=scenario,**kwargs)

In [18]:
for scenario in ['scenario1','scenario2','scenario3']:
#for scenario in ['scenario1','scenario2']:
#for scenario in ['scenario1']:
    for r in [1,2]:
    #for r in [1]:
        print('---------------------',scenario,'r =',r,'---------------------')
        for i_param in range(param_df.shape[0]):
            kwargs = dict(param_df.iloc[i_param,:])
            print('----------------- [%d/%d] -----------------\n'%(i_param+1,param_df.shape[0]))
            results_all = Parallel(n_jobs=n_kernel)(delayed(parallel_unit)(i=i,r=r,scenario=scenario,**kwargs) for i in tqdm(range(n_data)))

            # qrwg 
            dat_array = np.array([results_all[i][0] for i in range(n_data)])
            bias = dat_array[:,:,0].mean(axis=0)-tau
            rmse = np.sqrt(np.mean((dat_array[:,:,0]-tau)**2,axis=0).astype(float))
            covarage = ((dat_array[:,:,1]<=tau)*(dat_array[:,:,2]>=tau)).mean(axis=0)
            width = (dat_array[:,:,2] - dat_array[:,:,1]).mean(axis=0)

            result_dict = {'bias':bias,
                      'rmse':rmse,
                      'covarage_ri':covarage,
                      'width_ri':width,
                      'pa':param_df.iloc[i_param,3],
                      'method':['QReR-'+dat_array[0,i,3] for i in range(dat_array.shape[1])],
                      'r':r,
                      'scenario':scenario}
            result_df = pd.DataFrame(result_dict)
            result1_df.append(result_df.iloc[[0,3,6],:])
            result2_df.append(result_df.iloc[[1,4,7],:])
            result3_df.append(result_df.iloc[[2,5,8],:])
            
            # rer
            dat_array = np.array([results_all[i][1] for i in range(n_data)])
            bias = dat_array[:,:,0].mean(axis=0)-tau
            rmse = np.sqrt(np.mean((dat_array[:,:,0]-tau)**2,axis=0).astype(float))
            covarage = ((dat_array[:,:,1]<=tau)*(dat_array[:,:,2]>=tau)).mean(axis=0)
            width = (dat_array[:,:,2] - dat_array[:,:,1]).mean(axis=0)

            result_dict = {'bias':bias,
                      'rmse':rmse,
                      'covarage_ri':covarage,
                      'width_ri':width,
                      'pa':param_df.iloc[i_param,3],
                      'method':'ReR',
                      'r':r,
                      'scenario':scenario}
            result_df = pd.DataFrame(result_dict)
            result1_rer_df.append(result_df.iloc[0,:])
            result2_rer_df.append(result_df.iloc[1,:])
            result3_rer_df.append(result_df.iloc[2,:])
            
            
            ## check the xmdiff
            xmdiff_dict = {'r':r,
                           'scenario':scenario,
                           'pa':param_df.iloc[i_param,3],
                           #'KS':np.median(np.array([results_all[i][2] for i in range(n_data)])),
                           #'p-val':np.median(np.array([results_all[i][3] for i in range(n_data)]))
                           'KS':np.array([results_all[i][2] for i in range(n_data)]).mean().item(),
                           'p-val':np.array([results_all[i][3] for i in range(n_data)]).mean().item()
                      }
            xmdiff_df = pd.DataFrame(xmdiff_dict,index=[0])
            xmdiff_df_lst.append(xmdiff_df)
            
            accept_ratio_dict = {'r':r,
                           'scenario':scenario,
                           'pa':param_df.iloc[i_param,3],
                           'accept_ratio':np.array([results_all[i][4] for i in range(n_data)]).mean().item()}
            accept_ratio_df = pd.DataFrame(accept_ratio_dict,index=[0])
            accept_ratio_df_lst.append(accept_ratio_df)

  0%|          | 0/200 [00:00<?, ?it/s]

--------------------- scenario1 r = 1 ---------------------
----------------- [1/3] -----------------



100%|██████████| 200/200 [00:05<00:00, 39.66it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

----------------- [2/3] -----------------



100%|██████████| 200/200 [00:03<00:00, 58.07it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

----------------- [3/3] -----------------



100%|██████████| 200/200 [00:03<00:00, 58.73it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

--------------------- scenario1 r = 2 ---------------------
----------------- [1/3] -----------------



100%|██████████| 200/200 [00:04<00:00, 49.61it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

----------------- [2/3] -----------------



100%|██████████| 200/200 [00:03<00:00, 53.51it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

----------------- [3/3] -----------------



100%|██████████| 200/200 [00:03<00:00, 52.77it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

--------------------- scenario2 r = 1 ---------------------
----------------- [1/3] -----------------



100%|██████████| 200/200 [00:03<00:00, 59.36it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

----------------- [2/3] -----------------



100%|██████████| 200/200 [00:03<00:00, 59.78it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

----------------- [3/3] -----------------



100%|██████████| 200/200 [00:03<00:00, 59.94it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

--------------------- scenario2 r = 2 ---------------------
----------------- [1/3] -----------------



100%|██████████| 200/200 [00:03<00:00, 50.86it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

----------------- [2/3] -----------------



100%|██████████| 200/200 [00:03<00:00, 53.60it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

----------------- [3/3] -----------------



100%|██████████| 200/200 [00:03<00:00, 53.57it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

--------------------- scenario3 r = 1 ---------------------
----------------- [1/3] -----------------



100%|██████████| 200/200 [00:03<00:00, 61.09it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

----------------- [2/3] -----------------



100%|██████████| 200/200 [00:03<00:00, 61.89it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

----------------- [3/3] -----------------



100%|██████████| 200/200 [00:03<00:00, 61.75it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

--------------------- scenario3 r = 2 ---------------------
----------------- [1/3] -----------------



100%|██████████| 200/200 [00:03<00:00, 52.37it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

----------------- [2/3] -----------------



100%|██████████| 200/200 [00:03<00:00, 50.12it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

----------------- [3/3] -----------------



100%|██████████| 200/200 [00:03<00:00, 51.07it/s]


In [19]:
pd.DataFrame(result1_rer_df)

Unnamed: 0,bias,rmse,covarage_ri,width_ri,pa,method,r,scenario
0,0.004139,0.30902,0.955,1.24845,0.1,ReR,1,scenario1
0,0.029207,0.447042,0.94,1.6996,0.5,ReR,1,scenario1
0,-0.023117,0.574763,0.94,2.17375,1.0,ReR,1,scenario1
0,-0.000725,0.274191,0.95,1.0707,0.1,ReR,2,scenario1
0,0.025591,0.370507,0.955,1.45365,0.5,ReR,2,scenario1
0,-0.021953,0.477069,0.95,1.8689,1.0,ReR,2,scenario1
0,0.001218,0.385086,0.95,1.4933,0.1,ReR,1,scenario2
0,0.037899,0.525422,0.945,2.0498,0.5,ReR,1,scenario2
0,-0.037723,0.681188,0.95,2.6274,1.0,ReR,1,scenario2
0,0.018958,0.294759,0.955,1.2219,0.1,ReR,2,scenario2


In [20]:
outcome1 = [pd.concat([result1_df[i],pd.DataFrame(result1_rer_df[i]).T],axis=0) for i,_ in enumerate(result1_df)]
outcome1_df = pd.concat(outcome1,axis=0)
outcome1_df['outcome'] = 'Linear'
outcome1_df.set_index(['scenario','r','method'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,bias,rmse,covarage_ri,width_ri,pa,outcome
scenario,r,method,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
scenario1,1,QReR-S1,0.00829,0.343818,0.92,1.2493,0.1,Linear
scenario1,1,QReR-S2,-0.023528,0.116054,1.0,1.24975,0.1,Linear
scenario1,1,QReR-S3,0.046082,0.271471,0.985,1.24785,0.1,Linear
scenario1,1,ReR,0.004139,0.30902,0.955,1.24845,0.1,Linear
scenario1,1,QReR-S1,-0.00999,0.434436,0.945,1.6978,0.5,Linear
...,...,...,...,...,...,...,...,...
scenario3,2,ReR,0.006406,0.45248,0.95,1.7492,0.5,Linear
scenario3,2,QReR-S1,-0.102648,0.580003,0.95,2.2602,1.0,Linear
scenario3,2,QReR-S2,-0.041134,0.109118,1.0,2.2564,1.0,Linear
scenario3,2,QReR-S3,0.02109,0.504852,0.98,2.25525,1.0,Linear


In [21]:
outcome2 = [pd.concat([result2_df[i],pd.DataFrame(result2_rer_df[i]).T],axis=0) for i,_ in enumerate(result2_df)]
outcome2_df = pd.concat(outcome2,axis=0)
outcome2_df['outcome'] = 'NonLinear1'
outcome2_df.set_index(['scenario','r','method'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,bias,rmse,covarage_ri,width_ri,pa,outcome
scenario,r,method,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
scenario1,1,QReR-S1,0.003662,0.493466,0.925,1.7949,0.1,NonLinear1
scenario1,1,QReR-S2,-0.035671,0.19546,1.0,1.7941,0.1,NonLinear1
scenario1,1,QReR-S3,0.042611,0.418053,0.965,1.793,0.1,NonLinear1
scenario1,1,ReR,0.013678,0.464913,0.965,1.79375,0.1,NonLinear1
scenario1,1,QReR-S1,-0.015116,0.633343,0.96,2.41185,0.5,NonLinear1
...,...,...,...,...,...,...,...,...
scenario3,2,ReR,0.004226,0.631118,0.955,2.47995,0.5,NonLinear1
scenario3,2,QReR-S1,-0.244316,0.831529,0.945,3.1735,1.0,NonLinear1
scenario3,2,QReR-S2,-0.163963,0.249759,1.0,3.16885,1.0,NonLinear1
scenario3,2,QReR-S3,-0.079335,0.703125,0.98,3.16705,1.0,NonLinear1


In [22]:
outcome3 = [pd.concat([result3_df[i],pd.DataFrame(result3_rer_df[i]).T],axis=0) for i,_ in enumerate(result3_df)]
outcome3_df = pd.concat(outcome3,axis=0)
outcome3_df['outcome'] = 'NonLinear2'
outcome3_df.set_index(['scenario','r','method'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,bias,rmse,covarage_ri,width_ri,pa,outcome
scenario,r,method,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
scenario1,1,QReR-S1,0.04195,2.323347,0.92,7.7779,0.1,NonLinear2
scenario1,1,QReR-S2,0.018311,2.051431,0.965,7.7769,0.1,NonLinear2
scenario1,1,QReR-S3,0.174211,2.268272,0.935,7.77755,0.1,NonLinear2
scenario1,1,ReR,0.030233,2.026781,0.955,7.78745,0.1,NonLinear2
scenario1,1,QReR-S1,0.097949,2.416649,0.905,7.9296,0.5,NonLinear2
...,...,...,...,...,...,...,...,...
scenario3,2,ReR,-0.159411,4.750419,0.945,15.49565,0.5,NonLinear2
scenario3,2,QReR-S1,-5.492119,7.078483,0.695,15.49455,1.0,NonLinear2
scenario3,2,QReR-S2,-5.850971,7.002769,0.71,15.48785,1.0,NonLinear2
scenario3,2,QReR-S3,-5.614905,7.203379,0.685,15.4923,1.0,NonLinear2


In [23]:
outcome_df = pd.concat([outcome1_df,outcome2_df,outcome3_df])

In [24]:
outcome_df = outcome_df.set_index(['r','scenario','outcome','pa','method'])
outcome_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,bias,rmse,covarage_ri,width_ri
r,scenario,outcome,pa,method,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,scenario1,Linear,0.1,QReR-S1,0.00829,0.343818,0.92,1.2493
1,scenario1,Linear,0.1,QReR-S2,-0.023528,0.116054,1.0,1.24975
1,scenario1,Linear,0.1,QReR-S3,0.046082,0.271471,0.985,1.24785
1,scenario1,Linear,0.1,ReR,0.004139,0.30902,0.955,1.24845
1,scenario1,Linear,0.5,QReR-S1,-0.00999,0.434436,0.945,1.6978
...,...,...,...,...,...,...,...,...
2,scenario3,NonLinear2,0.5,ReR,-0.159411,4.750419,0.945,15.49565
2,scenario3,NonLinear2,1.0,QReR-S1,-5.492119,7.078483,0.695,15.49455
2,scenario3,NonLinear2,1.0,QReR-S2,-5.850971,7.002769,0.71,15.48785
2,scenario3,NonLinear2,1.0,QReR-S3,-5.614905,7.203379,0.685,15.4923


In [25]:
outcome_df.to_csv(save_folder_root+'qrer_rer_fp.csv')

In [26]:
xmdiff_df = pd.concat(xmdiff_df_lst)
xmdiff_df

Unnamed: 0,r,scenario,pa,KS,p-val
0,1,scenario1,0.1,0.083553,0.126815
0,1,scenario1,0.5,0.072987,0.14764
0,1,scenario1,1.0,0.068778,0.150153
0,2,scenario1,0.1,0.080984,0.129807
0,2,scenario1,0.5,0.07206,0.143974
0,2,scenario1,1.0,0.068111,0.147696
0,1,scenario2,0.1,0.082541,0.133801
0,1,scenario2,0.5,0.07503,0.13829
0,1,scenario2,1.0,0.06907,0.155675
0,2,scenario2,0.1,0.080379,0.132278


In [27]:
xmdiff_df.to_csv(save_folder_root+'xmdiff_qrer_rer_fp.csv')

In [28]:
accept_ratio_df = pd.concat(accept_ratio_df_lst)
accept_ratio_df

Unnamed: 0,r,scenario,pa,accept_ratio
0,1,scenario1,0.1,0.60815
0,1,scenario1,0.5,0.683395
0,1,scenario1,1.0,0.81231
0,2,scenario1,0.1,0.62425
0,2,scenario1,0.5,0.688545
0,2,scenario1,1.0,0.812705
0,1,scenario2,0.1,0.607135
0,1,scenario2,0.5,0.677025
0,1,scenario2,1.0,0.81637
0,2,scenario2,0.1,0.615055


In [29]:
accept_ratio_df.to_csv(save_folder_root+'acceptence_ratio_fp.csv')