In [1]:
%run init_notebook.py

[31mModule aliases imported by init_notebook.py:
--------------------------------------------
[32mimport[34m arviz [32mas[34m az

[32mimport[34m numpy [32mas[34m np

[32mimport[34m pandas [32mas[34m pd

[32mimport[34m pymc [32mas[34m pm

[32mimport[34m statsmodels.formula.api [32mas[34m smf

[32mimport[34m xarray [32mas[34m xr

[32mfrom[34m matplotlib [32mimport[34m pyplot [32mas[34m plt

[32mfrom[34m scipy [32mimport[34m stats [32mas[34m stats

[32mimport[34m utils [32mas[34m utils

[31mWatermark:
----------
[34mLast updated: 2023-09-26T21:37:29.556267-04:00

Python implementation: CPython
Python version       : 3.10.12
IPython version      : 8.15.0

Compiler    : Clang 15.0.7 
OS          : Darwin
Release     : 22.6.0
Machine     : arm64
Processor   : arm
CPU cores   : 8
Architecture: 64bit

[34mxarray     : 2023.8.0
pymc       : 5.8.0
matplotlib : 3.7.2
arviz      : 0.16.1
numpy      : 1.25.2
scipy      : 1.11.2
statsmodels: 0.14.0
pandas 

# Representing Causality

- The Fork: $X \leftarrow Z \rightarrow Y$
- The Pipe: $X \rightarrow Z \rightarrow Y$
- The Collider: $X \rightarrow Z \leftarrow Y$
- The Descendant: $X \rightarrow Z \rightarrow Y$, $Z \rightarrow A$

![Alt text](image-15.png)

In [3]:
"""Helper functions for displaying elemental confounds"""

def summarize_discrete_counfound_simulation(X, Y, Z):
    print(f'Correlation between X and Y: {np.corrcoef(X, Y)[0, 1]:1.2}')
    print("Cross tabulation:")
    print(utils.crosstab(X, Y, labels=['X', 'Y']))
    
    for z in [0, 1]:
        X_z = X[Z==z]
        Y_z = Y[Z==z]
        
        print(f'\nCorrelation between X and Y conditioned on Z={z}: {np.corrcoef(X_z, Y_z)[0, 1]:1.2f}')
        print("Cross tabulation:")
        print(utils.crosstab(X_z, Y_z, labels=['X', 'Y']))
        
def fit_linear_models_to_simulated_data(data):
    models = {}
    models['unstratified'] = smf.ols("Y ~ X", data=data).fit()

    # Stratified Models
    for z in [0, 1]:
        models[f'Z={z}'] = smf.ols("Y ~ X", data=data[data.Z==z]).fit()
    return models

def plot_sms_linear_model_fit(model, xs, label, color):
    """Helper function to plot linear models"""
    params = model.params
    ys = params.Intercept + params.X * xs
    return utils.plot_line(xs, ys, label=label, color=color)

def plot_continuous_confound_simulation(data, title):
    """Helper function to plot simulations"""
    models = fit_linear_models_to_simulated_data(data)
    plt.subplots(figsize=(6, 6))
    
    xs = np.linspace(-4, 4, 20)
    for z in [0, 1]:
        color = f"C{np.abs(z - 1)}"  # Note: flip colormap
        utils.plot_scatter(data[data.Z==z].X, data[data.Z==z].Y, color=color);
        plot_sms_linear_model_fit(models[f'Z={z}'], xs, label=f'Z={z}', color=color)

    plot_sms_linear_model_fit(models['unstratified'], xs, label='total sample', color='black')
    
    plt.xlabel("X")
    plt.ylabel("Y")

    plt.xlim([-4, 4])
    plt.ylim([-4, 4])
    plt.legend();
    plt.title(title)