In [None]:
import sys
sys.path.append('..')

from datetime import datetime

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from data.binary import BinaryCauseData
from data.continuous import GaussianData

from algorithm.base_test import TwoSampleConfounderTest, FullTwoSampleConfounderTest
from algorithm.environment import EnvironmentTest

from experiment import fast_experiment, save_results
from plot_tools import set_mpl_default_settings, marker_dict

set_mpl_default_settings()

# Experiment: Comparing our procedure to the $Y \perp E \mid T$

In [None]:
load = False

# Data
SimulateClass = BinaryCauseData 

# Experiment parameters
nbr_env = [500]
nbr_samples = [100]
repetitions = 50
sign_level = 0.05

# Dataset
conf_strength = [0,1]

dist_param_list = []
for y_b in list(np.linspace(0,.25,20)):
          dist_param_list.append({ 'X': {'a': 0.0, 'b': 10},
                                   'Y': {'a': 0.0, 'b': y_b},
                                   'T': {'a': 0.0, 'b': 1}
                                        })

# Get timestamp for experiment
now = datetime.now()
timestamp = now.strftime("%m%d%H%M")
print('Timestamp:', timestamp)

In [None]:
if not load:
# Run experiment with EnvironmentTest
    env_test_res  =fast_experiment(dist_param_list,
                                            nbr_env,
                                            nbr_samples,
                                            conf_strength,
                                            SimulateClass, 
                                            EnvironmentTest, 
                                            repetitions=repetitions, 
                                            sign_level=sign_level)



In [None]:
if not load:
    # Run experiment with TwoSampleConfounderTest
    conf_test_res  =fast_experiment(dist_param_list,
                                            nbr_env,
                                            nbr_samples,
                                            conf_strength,
                                            SimulateClass, 
                                            FullTwoSampleConfounderTest, 
                                            repetitions=repetitions, 
                                            sign_level=sign_level)

In [None]:
env_test_res.sort_values('Y_b', inplace=True)
conf_test_res.sort_values('Y_b', inplace=True)

# Filter with or without confounding
env_test_res_cs1 = env_test_res[env_test_res.confounder_strength == 1]
env_test_res_cs0 = env_test_res[env_test_res.confounder_strength == 0]

conf_test_res_cs1 = conf_test_res[conf_test_res.confounder_strength == 1]
conf_test_res_cs0 = conf_test_res[conf_test_res.confounder_strength == 0]



In [None]:

std = lambda p : np.sqrt(p*(1-p)/repetitions)
std_env = std(env_test_res_cs0.reject_rate.values) 
std_conf_test = std(conf_test_res_cs0.reject_rate.values) 


plt.plot(env_test_res_cs0.Y_b, env_test_res_cs0.reject_rate, label='$Y \perp E \mid T$', marker=marker_dict['EnvironmentTest']) 
plt.fill_between(env_test_res_cs0.Y_b, env_test_res_cs0.reject_rate-std_env, env_test_res_cs0.reject_rate+std_env, alpha=0.5)

plt.plot(conf_test_res_cs0.Y_b, conf_test_res_cs0.reject_rate, label='$T_j \perp Y_i \mid T_i$ (ours)', marker=marker_dict['FullTwoSampleConfounderTest'])
plt.fill_between(conf_test_res_cs0.Y_b, conf_test_res_cs0.reject_rate-std_conf_test, conf_test_res_cs0.reject_rate+std_conf_test, alpha=0.5)


plt.ylabel('Probability of false detection')
plt.xlabel('Standard deviation $\sigma_{\\theta_Y}$')
plt.ylim([-.05,1.05])
#plt.legend()

path = f'results/figures/comparison_cs0_{timestamp}.pdf'
plt.savefig(path, format='pdf', bbox_inches='tight')




In [None]:

std_env = std(env_test_res_cs1.reject_rate.values) 
std_conf_test = std(conf_test_res_cs1.reject_rate.values) 


plt.plot(env_test_res_cs1.Y_b, env_test_res_cs1.reject_rate, label='$Y \perp E \mid T$', marker=marker_dict['EnvironmentTest']) 
plt.fill_between(env_test_res_cs1.Y_b, env_test_res_cs1.reject_rate-std_env, env_test_res_cs1.reject_rate+std_env, alpha=0.5)

plt.plot(conf_test_res_cs1.Y_b, conf_test_res_cs1.reject_rate, label='$T_j \perp Y_i \mid T_i$ (ours)', marker=marker_dict['FullTwoSampleConfounderTest'])
plt.fill_between(conf_test_res_cs1.Y_b, conf_test_res_cs1.reject_rate-std_conf_test, conf_test_res_cs1.reject_rate+std_conf_test, alpha=0.5)


plt.ylabel('Probability of correct detection')
plt.xlabel('Standard deviation $\sigma_{\\theta_Y}$')
plt.ylim([-.05,1.05])
plt.legend()

path = f'results/figures/comparison_cs1_{timestamp}.pdf'
plt.savefig(path, format='pdf', bbox_inches='tight')