In [1]:
import warnings
warnings.simplefilter("always")

import numpy as np
from esa_2scm.syniv import SynIV
from esa_2scm import ESA2SCM

In [2]:
x_0var = [1,1,1]
x_M2 = [1,1,2,3]
x_M3 = [1,1,2,3,4,5]

In [3]:
print(SynIV.esa(x_0var, M=5))
print()
print(SynIV.dense_rank(x_0var))
print()
print(SynIV.m_split(x_0var, strategy="auto"))

[1 1 1]

[1 1 1]

[1 1 1]


                            Data is excessively concentrated on a single segment to perform meaningful ESA. Using Dense Rank method instead.
                            (Single value accounts for 100.00% of the total dataset while single segment threshold for M=5 is fixed at 20.00%).
                            (This may indicate bias in the dataset, and may happen more commonly if the provided data is discrete and imbalanced). 
                            


In [4]:
print(SynIV.esa(x_M2, M=5))
print()
print(SynIV.dense_rank(x_M2))
print()
print(SynIV.m_split(x_M2, strategy="auto"))

[1 1 2 2]

[1 1 2 3]

[-1 -1  1  1]


                            Data is excessively concentrated on a single segment to perform meaningful ESA. Using M=2 instead of M=5.
                            (Single value accounts for 50.00% of the total dataset while single segment threshold for M=5 is fixed at 20.00%).
                            (This may indicate bias in the dataset, and may happen more commonly if the provided data is discrete and imbalanced). 
                            


In [5]:
print(SynIV.esa(x_M3, M=5))
print()
print(SynIV.dense_rank(x_M3))
print()
print(SynIV.m_split(x_M3, strategy="auto"))

[1 1 2 2 3 3]

[1 1 2 3 4 5]

[-1 -1 -1  1  1  1]


                            Data is excessively concentrated on a single segment to perform meaningful ESA. Using M=3 instead of M=5.
                            (Single value accounts for 33.33% of the total dataset while single segment threshold for M=5 is fixed at 20.00%).
                            (This may indicate bias in the dataset, and may happen more commonly if the provided data is discrete and imbalanced). 
                            


In [6]:
correct_count = 0
wrong_count = 0
total_runs = 100
b_list = []
for _ in range(total_runs):
    np.random.seed(_)
    N = 10000
    shape, scale = 2., 2.
    x2 = np.random.random(size=N)
    e1 = np.random.normal(size=N)
    b12 = 1.8
    x1 = b12 * x2 + e1
    b_list.append(b12)
    model = ESA2SCM(x1, x2)
    model.fit('esa', M=5)
    if model.causal_direction == "x2->x1":
        correct_count += 1
    else: wrong_count += 1

print(f"Detected correctly: {correct_count}/{total_runs}")
print(f"Detected wrongly: {wrong_count}/{total_runs}")
print(f"Accuracy: {correct_count / total_runs * 100:.2f}%")
print(f"mean estimated causal impact coef: {np.mean(b_list)}")
print()
print()
print(model.causal_coef)
print(model.causal_direction)
print(model.score)
print(model.x1)
print(model.x2)
print(model.x_hat)
print(model.z1)
print(model.z2)
print(model.corr_x2_to_slsiv)
print(model.corr_x1_to_slsiv)

model.summary()

Detected correctly: 97/100
Detected wrongly: 3/100
Accuracy: 97.00%
mean estimated causal impact coef: 1.800000000000001


1.7947457098599489
x2->x1
0.1998
[1.34408743 0.86129911 0.88236526 ... 0.20136316 0.67327306 2.26203288]
[0.67227856 0.4880784  0.82549517 ... 0.20172246 0.01562267 0.98991499]
[0.59515583 0.50543579 0.50543579 ... 0.41571574 0.50543579 0.68487588]
[4 3 3 ... 2 3 5]
[4 3 5 ... 1 1 5]
0.9792399061936715
0.9435389805488189


Unnamed: 0,x2->x1,x1->x2
Causal Direction,True,False
Goodness of Fit,0.1998,0.19678
Causal Coefficient,1.794746,0.119574
Corr (2SLS_IV-Explanatory),0.97924,0.943539


In [7]:
N = 10000
shape, scale = 2., 2.
x2 = np.random.gamma(shape, scale, N)
e1 = np.random.normal(size=N)
b12 = 1.8
x1 = b12 * x2 + e1

In [8]:
model = ESA2SCM(x1, x2)
model.fit()
print(model.causal_direction)
print(model.causal_coef)
model.summary()

x2->x1
1.7918289389215107


Unnamed: 0,x2->x1,x1->x2
Causal Direction,True,False
Goodness of Fit,0.52938,0.52621
Causal Coefficient,1.791829,0.53194
Corr (2SLS_IV-Explanatory),0.742845,0.745426


In [9]:
model = ESA2SCM(x1, x2, prior_knowledge="x1->x2")
model.fit()
print(model.causal_direction)
print(model.causal_coef)
model.summary()

x1->x2 (prior knowledge)
0.5361920413706509


Unnamed: 0,x1->x2 (Predetermined)
Causal Direction,x1->x2
Goodness of Fit,0.962203
Causal Coefficient,0.536192


In [10]:
print(model.x1)

[ 3.65951274 14.05187358  4.47800236 ...  4.17280154 10.78761116
  5.64935329]


In [11]:
print(model.z1)

AttributeError: Synthetic IV (z1) is not generated as Prior Knowledge has been set

In [12]:
print(model.corr_x1_to_slsiv)

AttributeError: Synthetic IV to calculate correlation coefficient is not generated as Prior knowledge has been set