In [2]:
import numpy as np
import pandas as pd
from itertools import product

In [3]:
# Participant simulation

# we'll create 1000 participants in our experiment

n = 1000

In [4]:
# let's create those participant partitions we discussed above

compliance_partitions = ['always_taker', 'complier', 'defier', 'never_taker']
response_partitions = ['always_better', 'helped', 'hurt', 'never_better']
partition_types = np.array(list(product(compliance_partitions, response_partitions)))

# we can also simulate probabilities that our participants will belong to one of the
# 16 possible behavior combinations

partition_probabilities = np.random.random(16)
partition_probabilities = partition_probabilities / partition_probabilities.sum()

# to be a true set of probabilities, the vector sum needs to be 1
# sometimes this can fail because of precision errors
assert partition_probabilities.sum() == 1 

In [5]:
# drawing participant compliance and response behaviors according to the
# specified distribution

participant_partition = np.random.choice(range(len(partition_types)), n, p=partition_probabilities)
compliance_type, response_type = list(zip(*partition_types[participant_partition]))

# assigning participants to Control and Treatment groups with 50% probability

assignments = np.array(['control', 'treatment'])
participant_assignment = assignments[np.concatenate([np.zeros(n//2), np.ones(n//2)]).astype('int32')]

# compiling all information into our dataframe

df = pd.DataFrame({'assignment': participant_assignment,
                   'compliance_type': compliance_type,
                   'response_type': response_type})

In [6]:
# Simulate outcomes

# depending on assignment and compliance type, did the participant take the treatment?

# if the participant is an always_taker, they'll always take the treatment.
# if they're a complier, they'll take the treatment as long as they're in the treatment condition.
# if they're a defier, they'll only take the treatment if they were in the control condition.
df['took_treatment'] = (df.compliance_type == 'always_taker') \
                       | ( (df.compliance_type == 'complier') & (df.assignment == 'treatment')) \
                       | ( (df.compliance_type == 'defier') & (df.assignment == 'control'))

# depending on whether they took the treatment and their response_type, 
# what was the participant's outcome?

# if the participant is of the always_better type, they'll definitely have a good outcome.
# if the participant is of the 'helped' type, they'll have a good outcome as long as they
# took treatment.
df['good_outcome'] = (df.response_type == 'always_better') \
                     | ( (df.response_type == 'helped') & (df.took_treatment) )


In [7]:
# we can observe the probabilities of each Assignment, Treatment, Outcome
# combinations that would emerge

# get all the probabilities we need: p(z,x,y) for each z,x,y combination

# we can get all states by taking the cartesian product of the different
# binary possibilities: treatment vs. control group, took_treatment=True vs. False, etc.
states = product(['treatment','control'],[False, True], [False, True])

# this is an ugly list comprehension that calculates probabilities for each of the states
# we generated above
p_states = {f"{assignment}/{'treated' if treated == 1 else 'untreated'}/{'good' if outcome == 1 else 'bad'}" : 
                ( (df[df.assignment == assignment].took_treatment == treated)
                   & (df[df.assignment == assignment].good_outcome == outcome)  ).mean()
                for assignment, treated, outcome in states
                }

# display:
pd.DataFrame(p_states, index=['probabilities']).T

Unnamed: 0,probabilities
treatment/untreated/bad,0.278
treatment/untreated/good,0.174
treatment/treated/bad,0.344
treatment/treated/good,0.204
control/untreated/bad,0.336
control/untreated/good,0.182
control/treated/bad,0.316
control/treated/good,0.166


In [8]:
pd.DataFrame(p_states, index=['probabilities']).T.to_csv('experiment_probas.csv')

In [None]:
# intent-to-treat average treatment effect

# sum up all 'good' from the Treatment column, and subtract by all 'good' from the Control


In [None]:
# Intent to treat analysis:

itt_ate = df[df.assignment == 'treatment'].good_outcome.mean() \
          - df[df.assignment == 'control'].good_outcome.mean() 

print("ATE: %6.4f" % itt_ate)

In [None]:
# Causal analysis

# get all the probabilities we need: p(z,x,y) for each z,x,y combination
states = list(product(['treatment','control'],[0,1], [0,1]))

p_states = {f"{assignment}/{'treated' if took == 1 else 'untreated'}/{'good' if outcome ==1 else 'bad'}" : 
                ( (df[df.assignment == assignment].took_treatment == took)
                   & (df[df.assignment == assignment].good_outcome == outcome)  ).mean()
                for assignment, took, outcome in states
                }

In [None]:
p_states

In [None]:
true_ate = (df.response_type == 'helped').mean() - (df.response_type == 'hurt').mean()

In [9]:
# linear programming problem
import pulp
min_problem = pulp.LpProblem("min ATE", pulp.LpMinimize)
max_problem = pulp.LpProblem("max ATE", pulp.LpMaximize)

# our hidden variables are the the probability of being in one of the partitions
partition_names = ['/'.join([compliance, response]) for compliance, response in partition_types]
q = {partition: pulp.LpVariable(partition, lowBound=0) for partition in partition_names}

In [10]:
# since our hidden vars are probabilities the sum of them should all be under 1
min_problem += sum([v for k,v in q.items()]) == 1

In [11]:
# statements
p_treatment_untreated_bad = q['never_taker/never_better'] + q['defier/never_better'] \
                             + q['never_taker/helped'] + q['defier/helped']

p_treatment_untreated_good = q['never_taker/always_better'] + q['defier/always_better'] \
                             + q['never_taker/hurt'] + q['defier/hurt']

p_treatment_treated_bad = q['always_taker/never_better'] + q['complier/never_better'] \
                             + q['always_taker/hurt'] + q['complier/hurt']

p_treatment_treated_good = q['always_taker/always_better'] + q['complier/always_better'] \
                             + q['always_taker/helped'] + q['complier/helped']

p_control_untreated_bad = q['never_taker/never_better'] + q['complier/never_better'] \
                             + q['never_taker/helped'] + q['complier/helped']

p_control_untreated_good = q['never_taker/always_better'] + q['complier/never_better'] \
                             + q['never_taker/hurt'] + q['complier/hurt']

p_control_treated_bad = q['always_taker/never_better'] + q['defier/never_better'] \
                             + q['always_taker/hurt'] + q['defier/hurt']

p_control_treated_good = q['always_taker/always_better'] + q['defier/always_better'] \
                             + q['always_taker/helped'] + q['defier/helped']

In [12]:
# there's a natural mapping from probabilities we see to the hidden variables we have
# we'll spell these out one by one.
# there are probably smarter ways to express this as a vector operation,
# but this is easier to understand
min_problem += p_treatment_untreated_bad == p_states['treatment/untreated/bad']

min_problem += p_treatment_untreated_good == p_states['treatment/untreated/good']

min_problem += p_treatment_treated_bad == p_states['treatment/treated/bad']

min_problem += p_control_untreated_bad == p_states['control/untreated/bad']

min_problem += p_control_untreated_good == p_states['control/untreated/good']

min_problem += p_control_treated_bad == p_states['control/treated/bad']

#min_problem += p_treatment_treated_good == p_states['treatment/treated/good']

#min_problem += p_control_treated_good == p_states['control/treated/good']

In [13]:
min_problem += q['complier/helped'] + q['defier/helped'] + q['always_taker/helped'] + q['never_taker/helped'] \
              - q['complier/hurt'] - q['defier/hurt'] - q['always_taker/hurt'] - q['never_taker/hurt']

In [14]:
pulp.LpStatus[min_problem.solve()]

'Optimal'

In [15]:
q_min = {partition:pulp.value(partition_p) for partition, partition_p in q.items()}
q_min

{'always_taker/always_better': 0.174,
 'always_taker/helped': 0.0,
 'always_taker/hurt': 0.316,
 'always_taker/never_better': 0.0,
 'complier/always_better': 0.0,
 'complier/helped': 0.03,
 'complier/hurt': 0.0,
 'complier/never_better': 0.028,
 'defier/always_better': 0.02,
 'defier/helped': 0.0,
 'defier/hurt': 0.0,
 'defier/never_better': 0.0,
 'never_taker/always_better': 0.0,
 'never_taker/helped': 0.0,
 'never_taker/hurt': 0.154,
 'never_taker/never_better': 0.278}

In [None]:
def apply_constraints(problem):
    
    # our hidden variables are the the probability of being in one of the partitions
    partition_names = ['/'.join([compliance, response]) for compliance, response in partition_types]
    q = {partition: pulp.LpVariable(partition, lowBound=0) for partition in partition_names}
    
    problem += sum([v for k,v in q.items()]) == 1
    
    p_treatment_untreated_bad = q['never_taker/never_better'] + q['defier/never_better'] \
                             + q['never_taker/helped'] + q['defier/helped']

    p_treatment_untreated_good = q['never_taker/always_better'] + q['defier/always_better'] \
                                 + q['never_taker/hurt'] + q['defier/hurt']

    p_treatment_treated_bad = q['always_taker/never_better'] + q['complier/never_better'] \
                                 + q['always_taker/hurt'] + q['complier/hurt']

    p_treatment_treated_good = q['always_taker/always_better'] + q['complier/always_better'] \
                                 + q['always_taker/helped'] + q['complier/helped']

    p_control_untreated_bad = q['never_taker/never_better'] + q['complier/never_better'] \
                                 + q['never_taker/helped'] + q['complier/helped']

    p_control_untreated_good = q['never_taker/always_better'] + q['complier/never_better'] \
                                 + q['never_taker/hurt'] + q['complier/hurt']

    p_control_treated_bad = q['always_taker/never_better'] + q['defier/never_better'] \
                                 + q['always_taker/hurt'] + q['defier/hurt']

    p_control_treated_good = q['always_taker/always_better'] + q['defier/always_better'] \
                                 + q['always_taker/helped'] + q['defier/helped']
    
    problem += p_treatment_untreated_bad == p_states['treatment/untreated/bad']
    problem += p_treatment_untreated_good == p_states['treatment/untreated/good']
    problem += p_treatment_treated_bad == p_states['treatment/treated/bad']
    problem += p_control_untreated_bad == p_states['control/untreated/bad']
    problem += p_control_untreated_good == p_states['control/untreated/good']
    problem += p_control_treated_bad == p_states['control/treated/bad']
    
    problem += q['complier/helped'] + q['defier/helped'] + q['always_taker/helped'] + q['never_taker/helped'] \
              - q['complier/hurt'] - q['defier/hurt'] - q['always_taker/hurt'] - q['never_taker/hurt']
    
    
    status = pulp.LpStatus[problem.solve()]
    
    if status != 'Optimal':
        raise ValueError('Infeasible')
        
    q_solved = {partition:pulp.value(partition_p) for partition, partition_p in q.items()}
    
    return q_solved


In [None]:
q_min = apply_constraints(min_problem)
q_max = apply_constraints(max_problem)

In [None]:
ate = lambda q: q['complier/helped'] + q['defier/helped'] + q['always_taker/helped'] + q['never_taker/helped'] \
              - q['complier/hurt'] - q['defier/hurt'] - q['always_taker/hurt'] - q['never_taker/hurt']

min_ate = ate(q_min)

In [None]:
max_ate = ate(q_max)

In [None]:
min_ate

In [None]:
max_ate