We simulate $$Y^e = \beta_1 X_1^e + \beta_2 X_2^e + \epsilon^e,$$ for two environements $e \in {r,f}$. In the following, we will consider Student's t margins and denote by, $Student(\sigma , \upsilon)$, a Student's t distirbution with scale $\sigma$ and degree of freedom $\upsilon$. Note that this is a regularly varying function with $\alpha = \upsilon$ and thus $\gamma = 1/\upsilon$. The smallest $\upsilon$, the heavier the distribution gets. 
$$
X_1^r \sim Student(2 , 5), \qquad 
X_2^r \sim Student(3 , 6), \qquad 
\epsilon^r \sim Student(5 , 10),
$$
$$
X_1^f \sim Student(5 , 1), \qquad 
X_2^f \sim Student(3 , 1.5), \qquad 
\epsilon^f \sim Student(3 , 10).
$$

This is the code done on the 17th of November for Student's t distribution 

In [106]:
import itertools
import numpy as np
import linear_regression
from LICP.licp import pirca
from sklearn.linear_model import LinearRegression
import importlib
importlib.reload(linear_regression)
from statistic import statistic_fun
from generation_process import generation 
from linear_regression import linear_regression_fun

 
sizes = [10**3 , 10**5]
coef_r = [[2 , 5] , [3 , 6] ,[5 ,10]]  # coefficients for regular enviroenement
coef_f = [[5 , 1] , [3 , 1.5] ,[3 , 10]] # coefficients for faulty  enviroenement 

coef = [coef_r , coef_f] 

linear_coef = [[1,2] , [2,1]]

permutation_plausible=[]
greedy_parents=[]

model=LinearRegression()
pirca_inst=pirca(model=model)

score = np.zeros(3)
s = {0,1}
subsets = []
for r in range(1, len(s) + 1):
    subsets.extend(itertools.combinations(s, r))
subsets = [np.array(subset) for subset in subsets]
for i in np.arange(100):
    #print(i)
    # np.random.seed(i)
    sample = generation(sizes, coef , linear_coef)
    sample_r = sample[0]
    sample_f = sample[1]
    num_subsets = len(subsets)
    for s in np.arange(num_subsets) :
        subset = subsets[s]
        Y_r_hat , resid_r = linear_regression_fun(sample_r , subset )
        Y_f_hat , resid_f = linear_regression_fun(sample_f , subset )
        resid = [resid_r , resid_f]
        extr_tail_fraction = [0.95 , 0.95]
        value_stat = statistic_fun(resid , extr_tail_fraction)
        #print(value_stat)
        if value_stat < 3.84:
            score[s] = score[s] + 0.01 

    data_f={str(i+1):sample_f[:,i].reshape(len(sample_f[:,i]),1) for i in range(4)}
    data_r={str(i+1):sample_r[:,i].reshape(len(sample_r[:,i]),1) for i in range(4)}
    del data_f['3']
    del data_r['3']
    pirca_inst=pirca(model=model)
    pirca_inst.compute_parents([data_f,data_r],targets=['4'],test='permutation')
    for x in pirca_inst.plausible['4']:
        permutation_plausible.append(x)
        
    pirca_inst=pirca(model=model)
    pirca_inst.compute_parents([data_f,data_r],targets=['4'],test='bootstrap',causal_search='greedy',alpha=0.5)
    for t in pirca_inst.parents:
        for x in pirca_inst.parents[t]:
            greedy_parents.append(x)



print("The set {1,2} is plausible ", round(score[2] * 100), "times out of 100")
print("The set {1} is plausible ", score[0]  * 100, "times out of 100" )
print("The set {2} is plausible ", score[1]  * 100, "times out of 100" )

print("The set {1,2} is PIRCA permutation plausible ", permutation_plausible.count(('1','2')), "times out of 100")
print("The set {1} is PIRCA permutation plausible ", permutation_plausible.count(('1')), "times out of 100" )
print("The set {2} is PIRCA permutation plausible ",permutation_plausible.count(('2')), "times out of 100" )

print("The set {1,2} is PIRCA greedy parents ", greedy_parents.count(('1','2')), "times out of 100")
print("The set {1} is PIRCA greedy parents ", greedy_parents.count(('1')), "times out of 100" )
print("The set {2} is PIRCA greedy parents ",greedy_parents.count(('2')), "times out of 100" )




Model Training Phase
|████████████████████████████████████████████████████████████████████████████████████████████████████| 100.00%
 Computing Plausible Sets
Greedily finding parent sets of size maximal 3
Model Training Phase████████-------------------------------------------------------------------------| 27.78%
|████████████████████████████████████████████████████████████████████████████████████████████████████| 100.00%
 Computing Plausible Sets
Greedily finding parent sets of size maximal 3
Model Training Phase██████████████-------------------------------------------------------------------| 33.33%
|████████████████████████████████████████████████████████████████████████████████████████████████████| 100.00%
 Computing Plausible Sets
Greedily finding parent sets of size maximal 3
Model Training Phase██████████████-------------------------------------------------------------------| 33.33%
|████████████████████████████████████████████████████████████████████████████████████████████████

In [102]:
for x in pirca_inst.parents:
    print(True)

### Here is the same experiment with homogeneous noise assumption fulfilled

In [107]:
import itertools
import numpy as np
import linear_regression
from LICP.licp import pirca
from sklearn.linear_model import LinearRegression
import importlib
importlib.reload(linear_regression)
from statistic import statistic_fun
from generation_process import generation 
from linear_regression import linear_regression_fun

 
sizes = [10**3 , 10**5]
coef_r = [[2 , 5] , [3 , 6] ,[5 ,10]]  # coefficients for regular enviroenement
coef_f = [[5 , 1] , [3 , 1.5] ,[5 , 10]] # coefficients for faulty  enviroenement 

coef = [coef_r , coef_f] 

linear_coef = [[1,2] , [2,1]]

permutation_plausible=[]
greedy_parents=[]

model=LinearRegression()
pirca_inst=pirca(model=model)

score = np.zeros(3)
s = {0,1}
subsets = []
for r in range(1, len(s) + 1):
    subsets.extend(itertools.combinations(s, r))
subsets = [np.array(subset) for subset in subsets]
for i in np.arange(100):
    #print(i)
    # np.random.seed(i)
    sample = generation(sizes, coef , linear_coef)
    sample_r = sample[0]
    sample_f = sample[1]
    num_subsets = len(subsets)
    for s in np.arange(num_subsets) :
        subset = subsets[s]
        Y_r_hat , resid_r = linear_regression_fun(sample_r , subset )
        Y_f_hat , resid_f = linear_regression_fun(sample_f , subset )
        resid = [resid_r , resid_f]
        extr_tail_fraction = [0.95 , 0.95]
        value_stat = statistic_fun(resid , extr_tail_fraction)
        #print(value_stat)
        if value_stat < 3.84:
            score[s] = score[s] + 0.01 

    data_f={str(i+1):sample_f[:,i].reshape(len(sample_f[:,i]),1) for i in range(4)}
    data_r={str(i+1):sample_r[:,i].reshape(len(sample_r[:,i]),1) for i in range(4)}
    del data_f['3']
    del data_r['3']
    pirca_inst=pirca(model=model)
    pirca_inst.compute_parents([data_f,data_r],targets=['4'],test='permutation',alpha=0.01)
    for x in pirca_inst.plausible['4']:
        permutation_plausible.append(x)
        
    pirca_inst=pirca(model=model)
    pirca_inst.compute_parents([data_f,data_r],targets=['4'],test='bootstrap',causal_search='greedy',alpha=0.5)
    for t in pirca_inst.parents:
        for x in pirca_inst.parents[t]:
            greedy_parents.append(x)



print("The set {1,2} is plausible ", round(score[2] * 100), "times out of 100")
print("The set {1} is plausible ", score[0]  * 100, "times out of 100" )
print("The set {2} is plausible ", score[1]  * 100, "times out of 100" )

print("The set {1,2} is PIRCA permutation plausible ", permutation_plausible.count(('1','2')), "times out of 100")
print("The set {1} is PIRCA permutation plausible ", permutation_plausible.count(('1')), "times out of 100" )
print("The set {2} is PIRCA permutation plausible ",permutation_plausible.count(('2')), "times out of 100" )

print("The set {1,2} is PIRCA greedy parents ", greedy_parents.count(('1','2')), "times out of 100")
print("The set {1} is PIRCA greedy parents ", greedy_parents.count(('1')), "times out of 100" )
print("The set {2} is PIRCA greedy parents ",greedy_parents.count(('2')), "times out of 100" )




Model Training Phase
|████████████████████████████████████████████████████████████████████████████████████████████████████| 100.00%
 Computing Plausible Sets
Greedily finding parent sets of size maximal 3
Model Training Phase████████-------------------------------------------------------------------------| 27.78%
|████████████████████████████████████████████████████████████████████████████████████████████████████| 100.00%
 Computing Plausible Sets
Greedily finding parent sets of size maximal 3
Model Training Phase████████-------------------------------------------------------------------------| 27.78%
|████████████████████████████████████████████████████████████████████████████████████████████████████| 100.00%
 Computing Plausible Sets
Greedily finding parent sets of size maximal 3
Model Training Phase████████-------------------------------------------------------------------------| 27.78%
|████████████████████████████████████████████████████████████████████████████████████████████████