In [3]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
from itertools import product
from simulation_utils import *
from test_utils import *

from tqdm.auto import tqdm
import pickle

import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
def simulate(n_reps = 3, n_sample=500, n_quant = 128,
                kernel_X = 'constant',sig_X=1,
                kernel_Y = 'constant',sig_Y=1,
                sig_Z=1,beta1=1,
                box_params = {'max_depth':1,'n_estimators':200,'eta':0.1},
                L2_pen=0.0001,n_splits=5,
                dependency = 0):

    (T_plug,T_corrected,T_double,sig_list,sigd_list,p_cox) = ([] for _ in range(6))
    quantiles = []

    data_sampler = cox_sampler(sig_X,sig_Y,sig_Z,dependency,beta1,kernel_X,kernel_Y,n_quant)
    data_sampler.scale_and_set_baseline()

    for _ in tqdm(range(n_reps), leave=False):
        X,Y,Z,tau = data_sampler.sample_all(n_sample)

        g_p, g_c, sig, best_params = compute_gamma(tau,Z,X,box_params,n_quant,L2_pen=L2_pen,cross_validate=True)
        g_d, sig_d = compute_gamma_double(tau,Z,X,best_params, n_quant, n_splits=n_splits,L2_pen=L2_pen)


        T_plug.append(np.linalg.norm(g_p,ord=np.inf))
        T_corrected.append(np.linalg.norm(g_c,ord=np.inf))
        T_double.append(np.linalg.norm(g_d,ord=np.inf))
        sig_list.append(sig[-1])
        sigd_list.append(sig_d[-1])
        p_cox.append(cox_test(X,Z,tau))

    
    df = pd.DataFrame({
        "T_plug":T_plug,
        "T_corrected":T_corrected,
        "T_double":T_double,
        "sigma":sig_list,
        "sigma_double":sigd_list,
        "p_cox":p_cox,
        "n_sample": n_sample*np.ones(n_reps),
        "beta1": beta1*np.ones(n_reps),
        "kernel_X": [kernel_X]*n_reps,
        "kernel_Y": [kernel_Y]*n_reps,
        "alt_param": [dependency]*n_reps
    })
    return df

In [3]:
simulation_data = []

## Simulation settings
kernels = ['constant']
if 1:
    kernel_list = [(k,k) for k in kernels]
else:
    kernel_list = list(product(kernels,kernels))

beta_list = [1]
sample_sizes = [100,500,1000,2000,5000,10000]
dependency_param = [0]
quantiles = [16,128,256]
n_sim = len(kernel_list)*len(beta_list)*len(sample_sizes)*len(dependency_param)*len(quantiles)
param_grid = product(kernel_list,beta_list,sample_sizes,dependency_param,quantiles)

for (k_X,k_Y),beta_1,sample_size,dependency,n_q in tqdm(param_grid, position = 0, leave=True, total=n_sim):
    sim_data = simulate(
        n_reps = 150, n_sample=sample_size,n_quant=n_q,
        sig_X=1, kernel_X=k_X,
        sig_Y=1, kernel_Y=k_Y,
        sig_Z=1,
        beta1=beta_1,
        box_params = {'max_depth':2,'n_estimators':200,'eta':0.1},
        L2_pen=0.0001,n_splits=5,
        dependency=dependency
    )
    #simulation_data.append(sim_data)
full_data = pd.concat(simulation_data)

100%|██████████| 18/18 [00:00<00:00, 99469.66it/s]

16
128
256
16
128
256
16
128
256
16
128
256
16
128
256
16
128
256





ValueError: No objects to concatenate

In [5]:
filename = '/Users/bwq666/Documents/GitHub/nonparametric-cli-test/sim_data/varying_quantiles2.pkl'
if 0:
    with open(filename, 'wb') as f:
        pickle.dump(full_data, f)

In [4]:
def simulate2(n_reps = 100, n_sample=500, n_quant = 128,
                kernel_X = 'constant',sig_X=1,
                kernel_Y = 'constant',sig_Y=1,
                sig_Z=1,beta1=1,
                box_params = {'max_depth':1,'n_estimators':200,'eta':0.1},
                L2_pen=0.0001,n_splits=5,
                dependency = 0,savepaths=False):
    (gam_plug,gam_corrected,gam_double) = ([] for _ in range(3))
    (T_plug,T_corrected,T_double,sigd_list,p_cox) = ([] for _ in range(5))

    data_sampler = cox_sampler(sig_X,sig_Y,sig_Z,dependency,beta1,kernel_X,kernel_Y,n_quant)
    data_sampler.scale_and_set_baseline()

    for _ in tqdm(range(n_reps), leave=False):
        ## Sample data
        X,Y,Z,tau = data_sampler.sample_all(n_sample)

        # Fit tests
        g_p, g_c, sig, best_params = compute_gamma(tau,Z,X,box_params,n_quant,L2_pen=L2_pen,cross_validate=True)
        g_d, sig_d = compute_gamma_double(tau,Z,X,best_params, n_quant, n_splits=n_splits,L2_pen=L2_pen)

        ## save test results
 
        gam_plug.append(g_p[-1])
        gam_corrected.append(g_c[-1])
        gam_double.append(g_d[-1])
        T_plug.append(np.linalg.norm(g_p,ord=np.inf))
        T_corrected.append(np.linalg.norm(g_c,ord=np.inf))
        T_double.append(np.linalg.norm(g_d,ord=np.inf))
        sigd_list.append(sig_d if savepaths else sig_d[-1])
        p_cox.append(cox_test(X,Z,tau))
    
    df = pd.DataFrame({
        "gam_plug":gam_plug,
        "gam_corrected":gam_corrected,
        "gam_double":gam_double,
        "T_plug":T_plug,
        "T_corrected":T_corrected,
        "T_double":T_double,
        "sigma_double":sigd_list,
        "p_cox":p_cox,
        "n_sample": n_sample*np.ones(n_reps),
        "beta1": beta1*np.ones(n_reps),
        "kernel_X": [kernel_X]*n_reps,
        "kernel_Y": [kernel_Y]*n_reps,
        "alt_param": [dependency]*n_reps,
        'n_quant':[n_quant]*n_reps
    })

    return df

In [13]:
simulation_data = []

## Simulation settings
kernels = ['zero', 'constant']
if 1:
    kernel_list = [(k,k) for k in kernels]
else:
    kernel_list = list(product(kernels,kernels))

beta_list = [1]
sample_sizes = [100,500,1000,2000,8000]
dependency_param = [0]
quantiles = [16,128,256]
n_sim = len(kernel_list)*len(beta_list)*len(sample_sizes)*len(dependency_param)*len(quantiles)
param_grid = product(kernel_list,beta_list,sample_sizes,dependency_param,quantiles)

for (k_X,k_Y),beta_1,sample_size,dependency,n_q in tqdm(param_grid, position = 0, leave=True, total=n_sim):
    sim_data = simulate2(
        n_reps = 300, n_sample=sample_size,n_quant=n_q,
        sig_X=1, kernel_X=k_X,
        sig_Y=1, kernel_Y=k_Y,
        sig_Z=1,
        beta1=beta_1,
        box_params = {'max_depth':2,'n_estimators':200,'eta':0.1},
        L2_pen=0.0001,n_splits=5,
        dependency=dependency
    )
    simulation_data.append(sim_data)
full_data = pd.concat(simulation_data)

100%|██████████| 30/30 [14:15:26<00:00, 1710.88s/it]


In [14]:
filename = '/Users/bwq666/Documents/GitHub/nonparametric-cli-test/sim_data/varying_quantiles3.pkl'
if 0:
    with open(filename, 'wb') as f:
        pickle.dump(full_data, f)

In [10]:
full_data

Unnamed: 0,gam_plug,gam_corrected,gam_double,T_plug,T_corrected,T_double,sigma_double,p_cox,n_sample,beta1,kernel_X,kernel_Y,alt_param,n_quant
0,0.136570,0.059101,0.031658,0.175127,0.071437,0.050661,0.409438,0.105370,100.0,1.0,constant,constant,0,16
1,0.150897,0.011543,0.046489,0.170231,0.030799,0.074163,0.464586,0.140351,100.0,1.0,constant,constant,0,16
2,0.033045,-0.020718,-0.027167,0.070493,0.031489,0.041151,0.417215,0.678488,100.0,1.0,constant,constant,0,16
3,0.122908,-0.013045,-0.034438,0.157814,0.020253,0.064582,0.378946,0.940069,100.0,1.0,constant,constant,0,16
4,0.154260,-0.033478,-0.006474,0.174387,0.041551,0.030890,0.381428,0.472016,100.0,1.0,constant,constant,0,16
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145,0.105233,-0.005876,0.003930,0.105900,0.007172,0.006627,0.182016,0.422439,8000.0,1.0,constant,constant,0,256
146,0.101880,-0.001223,-0.002872,0.102220,0.004488,0.005392,0.188870,0.814482,8000.0,1.0,constant,constant,0,256
147,0.108440,-0.001940,0.000328,0.108798,0.003089,0.006889,0.189861,0.823300,8000.0,1.0,constant,constant,0,256
148,0.100164,0.002336,0.001687,0.100575,0.002727,0.002735,0.182344,0.507955,8000.0,1.0,constant,constant,0,256
