In [1]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
from itertools import product
from simulation_utils import *
from test_utils import *

from os import chdir

from tqdm.auto import tqdm
import pickle

In [2]:
def simulate(n_reps = 3, n_sample=500, n_quant = 128,
                kernel_X = 'constant',sig_X=5,
                kernel_Y = 'constant',sig_Y=5,
                sig_Z=5,beta1=1,
                box_params = {'max_depth':1,'n_estimators':200,'eta':0.1},
                L2_pen=1,n_splits=5,
                dependency = 0):

    (gam_plug,gam_corrected,gam_double,
        T_plug,T_corrected,T_double,
        sig_list,sigd_list) = ([] for _ in range(8))

    data_sampler = cox_sampler(sig_X,sig_Y,sig_Z,dependency,beta1,kernel_X,kernel_Y,n_quant)
    data_sampler.scale_and_set_baseline()

    for _ in tqdm(range(n_reps), leave=False):
        X,Y,Z,tau = data_sampler.sample_all(n_sample)

        g_p, g_c, sig, best_params = compute_gamma(tau,Z,X,box_params,n_quant,L2_pen=L2_pen,cross_validate=True)
        g_d, sig_d = compute_gamma_double(tau,Z,X,best_params, n_quant, n_splits=n_splits,L2_pen=L2_pen)

        gam_plug.append(g_p)
        gam_corrected.append(g_c)
        gam_double.append(g_d)
        T_plug.append(np.linalg.norm(g_p,ord=np.inf))
        T_corrected.append(np.linalg.norm(g_c,ord=np.inf))
        T_double.append(np.linalg.norm(g_d,ord=np.inf))
        sig_list.append(sig)
        sigd_list.append(sig_d)
    
    df = pd.DataFrame({
        "gam_plug":gam_plug,
        "gam_corrected":gam_corrected,
        "gam_double":gam_double,
        "T_plug":T_plug,
        "T_corrected":T_corrected,
        "T_double":T_double,
        "sigma":sig_list,
        "sigma_double":sigd_list,
        "n_sample": n_sample*np.ones(n_reps),
        "beta1": beta1*np.ones(n_reps),
        "kernel_X": [kernel_X]*n_reps,
        "kernel_Y": [kernel_Y]*n_reps,
        "alt_param": [dependency]*n_reps
    })
    return df

In [9]:
sim_data = simulate(
    n_reps = 1000, n_sample=500, n_quant=128,
    sig_X=1, kernel_X='constant',
    sig_Y=1, kernel_Y='constant',
    sig_Z=1,
    beta1=-1,
    box_params = {'max_depth':2,'n_estimators':200,'eta':0.1},
    L2_pen=0.0001,n_splits=5,
    dependency=0
)

100%|██████████| 1/1 [04:59<00:00, 299.67s/it]


In [8]:
path = '/Users/bwq666/Documents/BoxHed/'
if 1:
    with open(path+'/sim_data/' + 'dml_example.pkl', 'wb') as f:
        pickle.dump(full_data, f)