In [2]:
import numpy as np
import pandas as pd
from itertools import product

In [3]:
# Participant simulation

# we'll create 1000 participants in our experiment

n = 1000

In [4]:
# let's create those participant partitions we discussed above

compliance_partitions = ['always_taker', 'complier', 'defier', 'never_taker']
response_partitions = ['always_better', 'helped', 'hurt', 'never_better']
partition_types = np.array(list(product(compliance_partitions, response_partitions)))

# we can also simulate probabilities that our participants will belong to one of the
# 16 possible behavior combinations

partition_probabilities = np.random.random(16)
partition_probabilities = partition_probabilities / partition_probabilities.sum()

# to be a true set of probabilities, the vector sum needs to be 1
# sometimes this can fail because of precision errors
assert partition_probabilities.sum() == 1 

In [5]:
# drawing participant compliance and response behaviors according to the
# specified distribution

participant_partition = np.random.choice(range(len(partition_types)), n, p=partition_probabilities)
compliance_type, response_type = list(zip(*partition_types[participant_partition]))

# assigning participants to Control and Treatment groups with 50% probability

assignments = np.array(['control', 'treatment'])
participant_assignment = assignments[np.concatenate([np.zeros(n//2), np.ones(n//2)]).astype('int32')]

# compiling all information into our dataframe

df = pd.DataFrame({'assignment': participant_assignment,
                   'compliance_type': compliance_type,
                   'response_type': response_type})

In [6]:
# Simulate outcomes

# depending on assignment and compliance type, did the participant take the treatment?

# if the participant is an always_taker, they'll always take the treatment.
# if they're a complier, they'll take the treatment as long as they're in the treatment condition.
# if they're a defier, they'll only take the treatment if they were in the control condition.
df['took_treatment'] = (df.compliance_type == 'always_taker') \
                       | ( (df.compliance_type == 'complier') & (df.assignment == 'treatment')) \
                       | ( (df.compliance_type == 'defier') & (df.assignment == 'control'))

# depending on whether they took the treatment and their response_type, 
# what was the participant's outcome?

# if the participant is of the always_better type, they'll definitely have a good outcome.
# if the participant is of the 'helped' type, they'll have a good outcome as long as they
# took treatment.
df['good_outcome'] = (df.response_type == 'always_better') \
                     | ( (df.response_type == 'helped') & (df.took_treatment) )


In [7]:
# we can observe the probabilities of each Assignment, Treatment, Outcome
# combinations that would emerge

# get all the probabilities we need: p(z,x,y) for each z,x,y combination

# we can get all states by taking the cartesian product of the different
# binary possibilities: treatment vs. control group, took_treatment=True vs. False, etc.
states = product(['treatment','control'],[False, True], [False, True])

# this is an ugly list comprehension that calculates probabilities for each of the states
# we generated above
p_states = {f"{assignment}/{'treated' if treated == 1 else 'untreated'}/{'good' if outcome == 1 else 'bad'}" : 
                ( (df[df.assignment == assignment].took_treatment == treated)
                   & (df[df.assignment == assignment].good_outcome == outcome)  ).mean()
                for assignment, treated, outcome in states
                }

# display:
pd.DataFrame(p_states, index=['probabilities']).T

Unnamed: 0,probabilities
treatment/untreated/bad,0.278
treatment/untreated/good,0.174
treatment/treated/bad,0.344
treatment/treated/good,0.204
control/untreated/bad,0.336
control/untreated/good,0.182
control/treated/bad,0.316
control/treated/good,0.166
