## Causal Inference With Python
- 용어정리 


[Judea, P. (2010). An introduction to causal inference. The International Journal of Biostatistics, 6(2), 1-62.](https://cdn1.sph.harvard.edu/wp-content/uploads/sites/1268/2019/10/ci_hernanrobins_1oct19.pdf)

In [32]:
import numpy as np
import pandas as pd

from sklearn.preprocessing import PolynomialFeatures

def generate_dataset_0(n_samples=500, set_X=None, show_z=False):
    """
    Generate samples from the Causal Model:
    Nodes: (X,Y,Z)
    Edges: (Z -> X, Z-> Y, X -> Y)    
    """
    p_z = 0.5
    p_x_z = [0.9, 0.1]
    p_y_xz = [0.2, 0.4, 0.6, 0.8]
    
    z = np.random.binomial(n=1, p=p_z, size=n_samples)
    
    if set_X is not None:
        assert(len(set_X) == n_samples)
        x = set_X
    else:
        p_x = np.choose(z, p_x_z)
        x = np.random.binomial(n=1, p=p_x, size=n_samples)
        
    p_y = np.choose(x+2*z, p_y_xz)
    y = np.random.binomial(n=1, p=p_y, size=n_samples)
    
    if show_z:
        return pd.DataFrame({"x":x, "y":y, "z":z})
    
    return pd.DataFrame({"x":x, "y":y})



In [27]:
import pandas as pd
import numpy as np

### Z와 X의 조건에 따라 Y의 선택 확률을 임의로 조정해 보자

In [184]:
p_z = 0.5
p_x_z = [0.9, 0.1] # Z on 0,1 
p_y_xz = [0.2, 0.4, 0.6, 0.8] # X,Z Pairs on (0,0), (1,0), (0,1), (1,1)
n_samples = 100000

z = np.random.binomial(n=1, p=p_z, size=n_samples)

# P(X|Z=0) = 0.9, P(X|Z=1) = 0.1
p_x = np.choose(z, p_x_z) 
x = np.random.binomial(n=1, p=p_x, size=n_samples)

# P(Y|X,Z) 
p_y = np.choose(x+2*z, p_y_xz)
y = np.random.binomial(n=1, p=p_y, size=n_samples)
    

In [182]:
data = pd.DataFrame({"z":z,"x":x, "y":y})

In [183]:
data.value_counts().sort_index()

z  x  y
0  0  0     4051
      1      996
   1  0    27132
      1    17858
1  0  0    17925
      1    26946
   1  0     1020
      1     4072
dtype: int64