In [1]:
import numpy as np
import pandas as pd
from itertools import product
from sklearn.preprocessing import normalize

#### Participant simulation

First we'll create *n* participants in our experiment.

In [2]:
n = 10000

For each of these participants, we'll randomly assign them some (Compliance, Response) behavior combination.

In [3]:
compliance_partitions = [
    "always_taker",
    "complier",
    "defier",
    "never_taker",
]
response_partitions = [
    "always_better",
    "helped",
    "hurt",
    "never_better",
]

# Take the cross product of these sets of types

partition_types = product(
    compliance_partitions, response_partitions
)
partition_types = np.array(list(partition_types))

In [4]:
# We'll simulate probabilities that our participants
# will belong to one of the 16 possible behavior combinations

# I'm setting up a contrived example
# to prove my point with these arbitrary floats--
# trust me it's instructive!

arbitrary_floats = np.array(
    [
        [
            0,
            0,
            0,
            0.01,
            0,
            0.14,
            0,
            0.16,
            0.32,
            0,
            0.31,
            0,
            0.04,
            0.02,
            0,
            0,
        ]
    ]
)
partition_probabilities = normalize(
    arbitrary_floats, "l1"
)
partition_probabilities = (
    partition_probabilities.flatten()
)

# to be a true set of probabilities, the vector sum
# needs to be 1

# sometimes this can fail because of precision errors,
# so let's assert it

assert partition_probabilities.sum() == 1

In [5]:
# drawing participant compliance and response behaviors according to the
# specified distribution

participant_partition = np.random.choice(
    range(len(partition_types)),
    n,
    p=partition_probabilities,
)

compliance_response_pairs = zip(
    *partition_types[participant_partition]
)
compliance_type, response_type = list(
    compliance_response_pairs
)

# assigning participants to Control and Treatment groups
# with 50% probability

assignments = ["control", "treatment"]
participant_assignment = np.random.choice(
    assignments, n
)

# compiling all information into a dataframe
# that simulates the participants

df = pd.DataFrame(
    {
        "assignment": participant_assignment,
        "compliance_type": compliance_type,
        "response_type": response_type,
    }
)

### Simulate whether participants took treatment

Depending on assignment and compliance type, we can simulate whether or not each participant took the treatment.

In [6]:
# if the participant is an always_taker,
# they'll always take the treatment.

df["took_treatment"] = (
    df.compliance_type == "always_taker"
)

# if they're a complier, they'll take the treatment
# as long as they're in the treatment condition.

df["took_treatment"] = df["took_treatment"] | (
    (df.compliance_type == "complier")
    & (df.assignment == "treatment")
)

# if they're a defier, they'll only take the treatment
# if they were in the control condition.

df["took_treatment"] = df["took_treatment"] | (
    (df.compliance_type == "defier")
    & (df.assignment == "control")
)

### Simulate Outcomes

Now we can simulate outcomes from the experiment.

Depending on whether they took the treatment and their `response_type`, did they end up in a Good or Bad state after the experiment's conclusion?

In [7]:
# if the participant is of the always_better type,
# they'll definitely have a good outcome.

df["good_outcome"] = (
    df.response_type == "always_better"
)

# if the participant is of the 'helped' type,
# they'll have a good outcome as long as they
# took treatment.

df["good_outcome"] = df["good_outcome"] | (
    (df.response_type == "helped")
    & (df.took_treatment)
)

# if the participant is of the 'hurt' type,
# they'll have a good outcome as long as they
# did NOT take the treatment!

df["good_outcome"] = df["good_outcome"] | (
    (df.response_type == "hurt")
    & (~df.took_treatment)
)

# Otherwise, the outcome is going to be bad
# and the column will have a False value.

We can now observe the probabilities of each (Treatment, Outcome) combination that would emerge, conditional on the assignment.

In [8]:
df["n"] = 1
results = (
    df.groupby(
        [
            "assignment",
            "took_treatment",
            "good_outcome",
        ]
    )
    .count()
    .n
)
results = results.to_frame()
results["assignment_n"] = results.groupby(
    "assignment"
).transform("sum")

In [9]:
p_states = results.n / results.assignment_n
p_states = p_states.rename("P( X, Y | Z )")

# display
p_states.to_frame()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,"P( X, Y | Z )"
assignment,took_treatment,good_outcome,Unnamed: 3_level_1
control,False,False,0.315285
control,False,True,0.043756
control,True,False,0.323676
control,True,True,0.317283
treatment,False,False,0.01982
treatment,False,True,0.670671
treatment,True,False,0.167968
treatment,True,True,0.141542
