# DATA GENERATING PROCESS (DGP) implementation

In [885]:
import sklearn
import numpy as np
import pandas as pd
import random
import matplotlib.pyplot as plt
import numpy.matlib as mat
from scipy.stats import truncnorm
import matplotlib.pyplot as plt

random.seed(1)

## Tunable knobs:

- Number of treatments
- Number of confounders
- Degree of positivity
- Degree of heterogeneity
- Bias/Effect ratio

In [886]:
# Read matrix of potential covariates
ihdp_matrix = pd.read_csv('ihdp.csv')
ihdp_matrix.drop(['Unnamed: 0', 'treat'], axis=1, inplace=True)

In [887]:
# Standardize dataset of potential covariates
standardize = lambda x: (x - x.mean())/x.std()
ihdp_standar = ihdp_matrix.apply(standardize, axis=1)

In [888]:
# Input parameters
number_of_treatments = 5 # 3, 5 or 8
number_of_confounders = 20 # 4, 8, 12 or 20
positivity = 0.8
heterogeneity = 0.8
bias_effect_ratio = 0.2
repetitions = 50
sd_t = 1 # standard deviation of the truncated normal distribution of t

# Multiplier vector for t
a = np.random.randn(number_of_confounders)

In [889]:
# Pick covariates
covariates = random.choices(ihdp_matrix.columns.values, k=number_of_confounders)

In [890]:
# Matrix of covariates (standardized)
W = ihdp_standar[covariates].to_numpy()

### Treatment

In [547]:
def get_truncated_normal(mean, sd, low=0, upp=number_of_treatments):
    return truncnorm((low - mean) / sd, (upp - mean) / sd, loc=mean, scale=sd)

In [819]:
# Treatment function (deterministic part)
def treatment(x):
    t_prime = ((number_of_treatments/2))*((np.tanh(x)+1))
    t_distrib = get_truncated_normal(mean = t_prime, sd = sd_t)
    t = np.floor(t_distrib.rvs())
    return [t_prime,t]

In [820]:
W_a = np.matmul(W,a)
W_a_standar = standardize(W_a)

In [827]:
(t_prime,t) = treatment(W_a_standar)

In [828]:
#pd.value_counts(t_prime).sort_index()

In [829]:
#pd.value_counts(t).sort_index()

In [830]:
#fig, ax = plt.subplots(2)
#ax[0].hist(t_prime, bins=[0,1,2,3,4,5])
#ax[1].hist(t, bins=[0,1,2,3,4,5])
#plt.show()

### Outcome

## Realization characterization parameters:

- Average true ATE
- Average biased ATE
- Average absolute error
- Error/true ATE ratio
- Treatment assignmet entropy?
- PEHE?