In [9]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
from itertools import product
from simulation_utils import *
from test_utils import *

from os import chdir

from tqdm.auto import tqdm
import pickle

import matplotlib.pyplot as plt
import seaborn as sns

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [10]:
def simulate(n_reps = 3, n_sample=500, n_quant = 128,
                kernel_X = 'constant',sig_X=5,
                kernel_Y = 'constant',sig_Y=5,
                sig_Z=5,beta1=1,
                box_params = {'max_depth':1,'n_estimators':200,'eta':0.1},
                L2_pen=1,n_splits=5,
                dependency = 0):

    (T_plug,T_corrected,T_double,sig_list,sigd_list,p_cox) = ([] for _ in range(6))

    data_sampler = cox_sampler(sig_X,sig_Y,sig_Z,dependency,beta1,kernel_X,kernel_Y,n_quant)
    data_sampler.scale_and_set_baseline()

    for _ in tqdm(range(n_reps), leave=False):
        X,Y,Z,tau = data_sampler.sample_all(n_sample)

        g_p, g_c, sig, best_params = compute_gamma(tau,Z,X,box_params,n_quant,L2_pen=L2_pen,cross_validate=True)
        g_d, sig_d = compute_gamma_double(tau,Z,X,best_params, n_quant, n_splits=n_splits,L2_pen=L2_pen)


        T_plug.append(np.linalg.norm(g_p,ord=np.inf))
        T_corrected.append(np.linalg.norm(g_c,ord=np.inf))
        T_double.append(np.linalg.norm(g_d,ord=np.inf))
        sig_list.append(sig[-1])
        sigd_list.append(sig_d[-1])
        p_cox.append(cox_test(X,Z,tau))
    
    df = pd.DataFrame({
        "T_plug":T_plug,
        "T_corrected":T_corrected,
        "T_double":T_double,
        "sigma":sig_list,
        "sigma_double":sigd_list,
        "p_cox":p_cox,
        "n_sample": n_sample*np.ones(n_reps),
        "beta1": beta1*np.ones(n_reps),
        "kernel_X": [kernel_X]*n_reps,
        "kernel_Y": [kernel_Y]*n_reps,
        "alt_param": [dependency]*n_reps
    })
    return df

In [11]:
sim_data = simulate(
    n_reps = 10, n_sample=500, n_quant=128,
    sig_X=1, kernel_X='constant',
    sig_Y=1, kernel_Y='constant',
    sig_Z=1,
    beta1=1,
    box_params = {'max_depth':2,'n_estimators':200,'eta':0.1},
    L2_pen=0.0001,n_splits=5,
    dependency=0
)



In [14]:
simulation_data = []

## Simulation settings
kernel_list = ['constant','gaussian','sine']
beta_list = [-1,1]
sample_sizes = [100,500,1000]
dependency_param = [0,5,10]
n_sim = len(kernel_list) * len(beta_list) * len(sample_sizes) * len(dependency_param)
param_grid = product(kernel_list,beta_list,sample_sizes,dependency_param)

for kernel,beta_1,sample_size,dependency in tqdm(param_grid, position = 0, leave=True, total=n_sim):
    sim_data = simulate(
        n_reps = 3, n_sample=sample_size,
        sig_X=1, kernel_X=kernel,
        sig_Y=1, kernel_Y=kernel,
        sig_Z=1,
        beta1=beta_1,
        box_params = {'max_depth':2,'n_estimators':200,'eta':0.1},
        L2_pen=0.001,n_splits=5,
        dependency=dependency
    )
    simulation_data.append(sim_data)
full_data = pd.concat(simulation_data)

100%|██████████| 54/54 [04:57<00:00,  5.50s/it]


In [16]:
filename = '/Users/bwq666/Documents/GitHub/nonparametric-cli-test/sim_data/DMLvsCox.pkl'
if 1:
    with open(filename, 'wb') as f:
        pickle.dump(full_data, f)