In [1]:
import numpy as np
import pandas as pd
from itertools import product
import pulp

In [3]:
# baking off intent-to-treat vs. analysis of compliance
n = 1000

compliance_partitions = ['always_taker', 'complier', 'defier', 'never_taker']
response_partitions = ['always_better', 'helped', 'hurt', 'never_better']
partition_types = np.array(list(product(compliance_partitions, response_partitions)))

partition_probabilities = np.random.random(16)
partition_probabilities = partition_probabilities / partition_probabilities.sum()

assert partition_probabilities.sum() == 1

In [4]:
# Participant simulation

# drawing participant compliance and response behaviors according to the
# specified distribution
participant_partition = np.random.choice(range(len(partition_types)), n, p=partition_probabilities)
compliance_type, response_type = list(zip(*partition_types[participant_partition]))

# randomly assigning participants to Control and Treatment groups
assignments = np.array(['control', 'treatment'])
participant_assignment = assignments[np.concatenate([np.zeros(n//2), np.ones(n//2)]).astype('int32')]

# compiling all information into our dataframe
df = pd.DataFrame({'assignment': participant_assignment,
                   'compliance_type': compliance_type,
                   'response_type': response_type})

In [5]:
# Simulate outcomes

# Depending on assignment and compliance type, do you take the treatment?
df['took_treatment'] = (df.compliance_type == 'always_taker') \
                       | ( (df.compliance_type == 'complier') & (df.assignment == 'treatment')) \
                       | ( (df.compliance_type == 'defier') & (df.assignment == 'control'))

df.took_treatment = df.took_treatment.astype('int32')

# Depending on whether you took the treatment and your response_type, what happens to you?
df['good_outcome'] = (df.response_type == 'always_better') \
                     | ( (df.response_type == 'helped') & (df.took_treatment) )

df.good_outcome = df.good_outcome.astype('int32')

In [6]:
# Intent to treat analysis:

print(df[df.assignment == 'treatment'].good_outcome.mean())
print(df[df.assignment == 'control'].good_outcome.mean())

itt_ate = df[df.assignment == 'treatment'].good_outcome.mean() \
- df[df.assignment == 'control'].good_outcome.mean() 

print("ATE: %6.4f" % itt_ate)

0.28
0.25
ATE: 0.0300


In [7]:
# Causal analysis

# get all the probabilities we need: p(z,x,y) for each z,x,y combination
states = list(product(['treatment','control'],[0,1], [0,1]))

p_states = {f"{assignment}/{'treated' if took == 1 else 'untreated'}/{'good' if outcome ==1 else 'bad'}" : 
                ( (df[df.assignment == assignment].took_treatment == took)
                   & (df[df.assignment == assignment].good_outcome == outcome)  ).mean()
                for assignment, took, outcome in states
                }

In [8]:
def get_state_probabilities(df):
    # get all the probabilities we need: p(z,x,y) for each z,x,y combination
    states = list(product(['treatment','control'],[0,1], [0,1]))

    p_states = {f"{assignment}/{'treated' if took == 1 else 'untreated'}/{'good' if outcome ==1 else 'bad'}" : 
                ( (df[df.assignment == assignment].took_treatment == took)
                   & (df[df.assignment == assignment].good_outcome == outcome)  ).mean()
                for assignment, took, outcome in states
                }
    
    return p_states
    

In [9]:
p_states

{'treatment/untreated/bad': 0.386,
 'treatment/untreated/good': 0.076,
 'treatment/treated/bad': 0.334,
 'treatment/treated/good': 0.204,
 'control/untreated/bad': 0.442,
 'control/untreated/good': 0.074,
 'control/treated/bad': 0.308,
 'control/treated/good': 0.176}

In [10]:
true_ate = (df.response_type == 'helped').mean() - (df.response_type == 'hurt').mean()

In [22]:
# linear programming problem

min_problem = pulp.LpProblem("min ATE", pulp.LpMinimize)
max_problem = pulp.LpProblem("max ATE", pulp.LpMaximize)

# our hidden variables are the the probability of being in one of the partitions
partition_names = ['/'.join([compliance, response]) for compliance, response in partition_types]
q = {partition: pulp.LpVariable(partition, lowBound=0) for partition in partition_names}

In [12]:
def set_up_variables(partition_types):
    partition_names = ['/'.join([compliance, response]) for compliance, response in partition_types]
    q = {partition: pulp.LpVariable(partition, lowBound=0) for partition in partition_names}
    return q

In [13]:
# since our hidden vars are probabilities the sum of them should all be under 1
min_problem += sum([v for k,v in q.items()]) == 1

In [14]:
# statements
p_treatment_untreated_bad = q['never_taker/never_better'] + q['defier/never_better'] \
                             + q['never_taker/helped'] + q['defier/helped']

p_treatment_untreated_good = q['never_taker/always_better'] + q['defier/always_better'] \
                             + q['never_taker/hurt'] + q['defier/hurt']

p_treatment_treated_bad = q['always_taker/never_better'] + q['complier/never_better'] \
                             + q['always_taker/hurt'] + q['complier/hurt']

p_treatment_treated_good = q['always_taker/always_better'] + q['complier/always_better'] \
                             + q['always_taker/helped'] + q['complier/helped']

p_control_untreated_bad = q['never_taker/never_better'] + q['complier/never_better'] \
                             + q['never_taker/helped'] + q['complier/helped']

p_control_untreated_good = q['never_taker/always_better'] + q['complier/never_better'] \
                             + q['never_taker/hurt'] + q['complier/hurt']

p_control_treated_bad = q['always_taker/never_better'] + q['defier/never_better'] \
                             + q['always_taker/hurt'] + q['defier/hurt']

p_control_treated_good = q['always_taker/always_better'] + q['defier/always_better'] \
                             + q['always_taker/helped'] + q['defier/helped']

In [15]:
# there's a natural mapping from probabilities we see to the hidden variables we have
# we'll spell these out one by one.
# there are probably smarter ways to express this as a vector operation,
# but this is easier to understand
min_problem += p_treatment_untreated_bad == p_states['treatment/untreated/bad']

min_problem += p_treatment_untreated_good == p_states['treatment/untreated/good']

min_problem += p_treatment_treated_bad == p_states['treatment/treated/bad']

min_problem += p_control_untreated_bad == p_states['control/untreated/bad']

min_problem += p_control_untreated_good == p_states['control/untreated/good']

min_problem += p_control_treated_bad == p_states['control/treated/bad']

#min_problem += p_treatment_treated_good == p_states['treatment/treated/good']

#min_problem += p_control_treated_good == p_states['control/treated/good']

In [16]:
min_problem += q['complier/helped'] + q['defier/helped'] + q['always_taker/helped'] + q['never_taker/helped'] \
              - q['complier/hurt'] - q['defier/hurt'] - q['always_taker/hurt'] - q['never_taker/hurt']

In [17]:
pulp.LpStatus[min_problem.solve()]

'Optimal'

In [18]:
q_min = {partition:pulp.value(partition_p) for partition, partition_p in q.items()}


In [19]:
q_min

{'always_taker/always_better': 0.174,
 'always_taker/helped': 0.0,
 'always_taker/hurt': 0.308,
 'always_taker/never_better': 0.0,
 'complier/always_better': 0.0,
 'complier/helped': 0.03,
 'complier/hurt': 0.0,
 'complier/never_better': 0.026,
 'defier/always_better': 0.028,
 'defier/helped': 0.0,
 'defier/hurt': 0.0,
 'defier/never_better': 0.0,
 'never_taker/always_better': 0.0,
 'never_taker/helped': 0.0,
 'never_taker/hurt': 0.048,
 'never_taker/never_better': 0.386}

In [20]:
def apply_constraints(problem):
    
    # our hidden variables are the the probability of being in one of the partitions
    partition_names = ['/'.join([compliance, response]) for compliance, response in partition_types]
    q = {partition: pulp.LpVariable(partition, lowBound=0) for partition in partition_names}
    
    problem += sum([v for k,v in q.items()]) == 1
    
    p_treatment_untreated_bad = q['never_taker/never_better'] + q['defier/never_better'] \
                             + q['never_taker/helped'] + q['defier/helped']

    p_treatment_untreated_good = q['never_taker/always_better'] + q['defier/always_better'] \
                                 + q['never_taker/hurt'] + q['defier/hurt']

    p_treatment_treated_bad = q['always_taker/never_better'] + q['complier/never_better'] \
                                 + q['always_taker/hurt'] + q['complier/hurt']

    p_treatment_treated_good = q['always_taker/always_better'] + q['complier/always_better'] \
                                 + q['always_taker/helped'] + q['complier/helped']

    p_control_untreated_bad = q['never_taker/never_better'] + q['complier/never_better'] \
                                 + q['never_taker/helped'] + q['complier/helped']

    p_control_untreated_good = q['never_taker/always_better'] + q['complier/never_better'] \
                                 + q['never_taker/hurt'] + q['complier/hurt']

    p_control_treated_bad = q['always_taker/never_better'] + q['defier/never_better'] \
                                 + q['always_taker/hurt'] + q['defier/hurt']

    p_control_treated_good = q['always_taker/always_better'] + q['defier/always_better'] \
                                 + q['always_taker/helped'] + q['defier/helped']
    
    problem += p_treatment_untreated_bad == p_states['treatment/untreated/bad']
    problem += p_treatment_untreated_good == p_states['treatment/untreated/good']
    problem += p_treatment_treated_bad == p_states['treatment/treated/bad']
    problem += p_control_untreated_bad == p_states['control/untreated/bad']
    problem += p_control_untreated_good == p_states['control/untreated/good']
    problem += p_control_treated_bad == p_states['control/treated/bad']
    
    problem += q['complier/helped'] + q['defier/helped'] + q['always_taker/helped'] + q['never_taker/helped'] \
              - q['complier/hurt'] - q['defier/hurt'] - q['always_taker/hurt'] - q['never_taker/hurt']
    
    
    status = pulp.LpStatus[problem.solve()]
    
    if status != 'Optimal':
        raise ValueError('Infeasible')
        
    q_solved = {partition:pulp.value(partition_p) for partition, partition_p in q.items()}
    
    return q_solved


In [23]:
q_min = apply_constraints(min_problem)
q_max = apply_constraints(max_problem)

In [24]:
ate = lambda q: q['complier/helped'] + q['defier/helped'] + q['always_taker/helped'] + q['never_taker/helped'] \
              - q['complier/hurt'] - q['defier/hurt'] - q['always_taker/hurt'] - q['never_taker/hurt']

min_ate = ate(q_min)

In [25]:
max_ate = ate(q_max)

In [26]:
min_ate

-0.326

In [27]:
max_ate

0.59