In [None]:
import sys
sys.path.append('..')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime

from data.binary import BinaryCauseData
from data.continuous import GaussianData
from algorithm.base_test import TwoSampleConfounderTest

from experiment import fast_experiment, fast2_experiment, save_results
from plot_tools import plot_experiment_results, set_mpl_default_settings

set_mpl_default_settings()


# Experiment: Vary effect size of confounder

In [None]:
load = False
vary_lambda_only = False

# Data and algorithm
SimulateClass = BinaryCauseData
TestClass_list = TwoSampleConfounderTest

# Experiment parameters
nbr_env = [i for i in range(500, 4500, 500)]
nbr_samples = [2]
repetitions = 50
sign_level = 0.05

# Dataset
if vary_lambda_only is True:
     # Run experiment where we only vary lambda (experiment in Appendix)
     conf_strength = [1]
     dist_param = []
     for lam in list(np.linspace(0, 15.0, 20)):
          dist_param.append({'X': {'a': 0.0, 'b': 1},
                              'Y': {'a': 0.0, 'b': 1},
                              'T': {'a': 0.0, 'b': 1},
                              'X_beta' : lam
                              })
else:
     conf_strength = list(np.linspace(0, 15.0, 20))
     dist_param = {'X': {'a': 0.0, 'b': 1},
                    'Y': {'a': 0.0, 'b': 1},
                    'T': {'a': 0.0, 'b': 1}
                         }

# Get timestamp for experiment
now = datetime.now()
timestamp = now.strftime("%m%d%H%M")
print('Timestamp:', timestamp)

In [None]:
if not load:
    if vary_lambda_only:
        experiment_results  = fast_experiment(dist_param,
                                                    nbr_env,
                                                    nbr_samples,
                                                    conf_strength,
                                                    SimulateClass, 
                                                    TestClass_list, 
                                                    repetitions=repetitions, 
                                                    sign_level=sign_level, 
                                                    nbr_proc=4)

    else:
        experiment_results  = fast2_experiment(dist_param,
                                                nbr_env,
                                                nbr_samples,
                                                conf_strength,
                                                SimulateClass, 
                                                TestClass_list, 
                                                repetitions=repetitions, 
                                                sign_level=sign_level, 
                                                nbr_proc=1,
                                                compute_bias=True)

    save_results(experiment_results, f'exp_res_bias', timestamp)


# Plot results

In [None]:
if load:
    # Load data
    timestamp_str = "05251725"
    timestamp = int(timestamp_str)
    path = f'results/exp_res_bias_{timestamp_str}.csv'

    path = 'results/example1_continuous_gamma.csv'

    experiment_results = pd.read_csv(path)

In [None]:
if not vary_lambda_only:
    experiment_results.sort_values('avg_bias', inplace=True)

    fixed_env_res = experiment_results[experiment_results.nbr_env == nbr_env[-1]]
    fixed_env_res.plot.scatter('avg_bias', 'reject_rate')

    path = f'results/figures/bias_{timestamp}.pdf'
    plt.savefig(path, format='pdf', bbox_inches='tight')

In [None]:
plot_experiment_results(experiment_results, vary_lambda_only=vary_lambda_only)

path = f'results/figures/vary_conf_{timestamp}.pdf'
if vary_lambda_only:
    plt.ylabel('Effect size $\lambda$')
plt.savefig(path, format='pdf', bbox_inches='tight')

In [None]:
experiment_results