# Measures on LUCAS example: lung cancer scenario

This notebook implements the second scenario in the paper.

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time

from pgmpy.models import BayesianNetwork as BN
from pgmpy.factors.discrete import TabularCPD as CPD
from pgmpy.inference import VariableElimination

from src.SCMMappings_1_1 import Abstraction
from src.measuring import ICEvaluator,IILEvaluator,ISILEvaluator,ISCEvaluator
from src.examples import LUCASmodels as lucas
import src.evaluationsets as es
import src.learning as lng

In [2]:
np.random.seed(0)

In [3]:
def rank_solutions(A,J):
    measures = [ICEvaluator,IILEvaluator,ISILEvaluator,ISCEvaluator]
    
    alphas = []
    results = []
    for m in measures:
        c_errors,c_alphas = lng.list_all_alphas_and_errors(A,J,m)
        alphas.append(c_alphas) 
        results.append(c_errors)
    res = np.array(results) 
    
    sortings = [np.argsort(res[i,:]) for i in range (len(measures))]
    sortedvals = [res[i,:][sortings[i]] for i in range (len(measures))]
    sortedsols = []
    for i in range (len(measures)):
        sortedsols.append([alphas[i][sortings[i][j]] for j in range(len(sortings[i]))])
        
    data = {
        'IC_val' : sortedvals[0],
        'IC_rank' : sortedsols[0],
        'IIL_val' : sortedvals[1],
        'IIL_rank' : sortedsols[1],
        'ISIL_val' : sortedvals[2],
        'ISIL_rank' : sortedsols[2],
        'ISC_val' : sortedvals[3],
        'ISC_rank' : sortedsols[3],
    }
    df = pd.DataFrame(data)

    return alphas,res,df

## LUCAS Model

In [4]:
M0 = lucas.lucas0()

In [5]:
M0do1 = M0.do(['Anxiety','Peer Pressure'])
infer = VariableElimination(M0do1)
P_FS = infer.query(['Lung Cancer','Anxiety','Peer Pressure'])
P_S = infer.query(['Anxiety','Peer Pressure'])
print(P_FS/P_S)
mu1 = (P_FS/P_S).values.reshape((2,4))

+----------------+------------+------------------+------------------------------------------+
| Lung Cancer    | Anxiety    | Peer Pressure    |   phi(Lung Cancer,Anxiety,Peer Pressure) |
| Lung Cancer(0) | Anxiety(0) | Peer Pressure(0) |                                   0.4379 |
+----------------+------------+------------------+------------------------------------------+
| Lung Cancer(0) | Anxiety(0) | Peer Pressure(1) |                                   0.2709 |
+----------------+------------+------------------+------------------------------------------+
| Lung Cancer(0) | Anxiety(1) | Peer Pressure(0) |                                   0.2058 |
+----------------+------------+------------------+------------------------------------------+
| Lung Cancer(0) | Anxiety(1) | Peer Pressure(1) |                                   0.1808 |
+----------------+------------+------------------+------------------------------------------+
| Lung Cancer(1) | Anxiety(0) | Peer Pressure(0) |          

In [6]:
mu1

array([[0.43788959, 0.27088918, 0.20578805, 0.18076425],
       [0.56211041, 0.72911082, 0.79421195, 0.81923575]])

## Lung Cancer Model

In [7]:
M1 = BN([('Environment','Lung Cancer'),('Genetics','Lung Cancer')])
nu = np.array([[.8,.45,.55,.4,.45,.3],[.2,.55,.45,.6,.55,.7]])

cpd = CPD(variable='Environment',
         variable_card = 3,
         values = np.array([[.7],[.1],[.2]]),
         evidence = None,
         evidence_card = None)
M1.add_cpds(cpd)

cpd = CPD(variable='Genetics',
         variable_card = 2,
         values = np.array([[.3],[.7]]),
         evidence = None,
         evidence_card = None)
M1.add_cpds(cpd)

cpd = CPD(variable='Lung Cancer',
         variable_card = 2,
         values = nu,
         evidence = ['Environment','Genetics'],
         evidence_card = [3,2])
M1.add_cpds(cpd)
M1.check_model()

True

In [8]:
M1do1 = M1.do(['Environment'])
infer = VariableElimination(M1do1)
P_FS = infer.query(['Lung Cancer','Environment'])
P_S = infer.query(['Environment'])
print(P_FS/P_S)
nu1 = (P_FS/P_S).values

+----------------+----------------+--------------------------------+
| Lung Cancer    | Environment    |   phi(Lung Cancer,Environment) |
| Lung Cancer(0) | Environment(0) |                         0.5550 |
+----------------+----------------+--------------------------------+
| Lung Cancer(0) | Environment(1) |                         0.4450 |
+----------------+----------------+--------------------------------+
| Lung Cancer(0) | Environment(2) |                         0.3450 |
+----------------+----------------+--------------------------------+
| Lung Cancer(1) | Environment(0) |                         0.4450 |
+----------------+----------------+--------------------------------+
| Lung Cancer(1) | Environment(1) |                         0.5550 |
+----------------+----------------+--------------------------------+
| Lung Cancer(1) | Environment(2) |                         0.6550 |
+----------------+----------------+--------------------------------+


In [9]:
R = ['Anxiety','Peer Pressure','Genetics','Allergy','Lung Cancer']
a = {'Anxiety': 'Environment',
     'Peer Pressure': 'Environment',
    'Genetics': 'Genetics',
    'Allergy': 'Genetics',
    'Lung Cancer': 'Lung Cancer'}
A = Abstraction(M0,M1,R,a)

## Abstraction evaluation

In [10]:
Jparental = [[['Environment','Genetics'], ['Lung Cancer']]]
alphas_par,res_par,df_par = rank_solutions(A,Jparental)

In [11]:
Jcausal = [[['Environment'], ['Lung Cancer']],
          [['Genetics'], ['Lung Cancer']],
          [['Environment','Genetics'],['Lung Cancer']]]
alphas_cau,res_cau,df_cau = rank_solutions(A,Jcausal)

In [12]:
Juser = [[['Environment'], ['Lung Cancer']]]
alphas_use,res_use,df_use = rank_solutions(A,Juser)

In [13]:
df_par

Unnamed: 0,IC_val,IC_rank,IIL_val,IIL_rank,ISIL_val,ISIL_rank,ISC_val,ISC_rank
0,0.329364,"{'Environment': [[1, 0, 0, 0], [0, 1, 0, 0], [...",0.329364,"{'Environment': [[1, 0, 0, 0], [0, 1, 0, 0], [...",0.220678,"{'Environment': [[1, 0, 0, 0], [0, 1, 0, 0], [...",0.220678,"{'Environment': [[1, 0, 0, 0], [0, 1, 0, 0], [..."
1,0.329364,"{'Environment': [[1, 0, 0, 0], [0, 1, 0, 0], [...",0.329364,"{'Environment': [[1, 0, 0, 0], [0, 1, 0, 0], [...",0.220678,"{'Environment': [[1, 0, 0, 0], [0, 1, 0, 0], [...",0.220678,"{'Environment': [[1, 0, 0, 0], [0, 1, 0, 0], [..."
2,0.329364,"{'Environment': [[1, 0, 0, 0], [0, 1, 0, 0], [...",0.329364,"{'Environment': [[1, 0, 0, 0], [0, 1, 0, 0], [...",0.237869,"{'Environment': [[1, 0, 0, 0], [0, 1, 1, 0], [...",0.237869,"{'Environment': [[1, 0, 0, 0], [0, 1, 1, 0], [..."
3,0.353581,"{'Environment': [[1, 0, 0, 0], [0, 1, 1, 0], [...",0.353581,"{'Environment': [[1, 0, 0, 0], [0, 1, 1, 0], [...",0.237869,"{'Environment': [[1, 0, 0, 0], [0, 1, 1, 0], [...",0.237869,"{'Environment': [[1, 0, 0, 0], [0, 1, 1, 0], [..."
4,0.353581,"{'Environment': [[1, 0, 0, 0], [0, 1, 1, 0], [...",0.353581,"{'Environment': [[1, 0, 0, 0], [0, 1, 1, 0], [...",0.244987,"{'Environment': [[1, 0, 0, 0], [0, 1, 0, 1], [...",0.244987,"{'Environment': [[1, 0, 0, 0], [0, 1, 0, 1], [..."
...,...,...,...,...,...,...,...,...
1003,0.619059,"{'Environment': [[0, 0, 0, 1], [1, 0, 1, 0], [...",0.619059,"{'Environment': [[0, 0, 0, 1], [1, 0, 1, 0], [...",0.619059,"{'Environment': [[0, 0, 0, 1], [1, 1, 0, 0], [...",0.619059,"{'Environment': [[0, 0, 0, 1], [1, 1, 0, 0], [..."
1004,0.619059,"{'Environment': [[0, 0, 0, 1], [1, 0, 1, 0], [...",0.619059,"{'Environment': [[0, 0, 0, 1], [1, 0, 1, 0], [...",0.619059,"{'Environment': [[0, 0, 0, 1], [1, 1, 0, 0], [...",0.619059,"{'Environment': [[0, 0, 0, 1], [1, 1, 0, 0], [..."
1005,0.619059,"{'Environment': [[0, 0, 0, 1], [1, 0, 1, 0], [...",0.619059,"{'Environment': [[0, 0, 0, 1], [1, 0, 1, 0], [...",0.619059,"{'Environment': [[0, 0, 0, 1], [1, 0, 1, 0], [...",0.619059,"{'Environment': [[0, 0, 0, 1], [1, 0, 1, 0], [..."
1006,0.619059,"{'Environment': [[0, 0, 0, 1], [0, 1, 0, 0], [...",0.619059,"{'Environment': [[0, 0, 0, 1], [0, 1, 0, 0], [...",0.619059,"{'Environment': [[0, 0, 0, 1], [0, 0, 1, 0], [...",0.619059,"{'Environment': [[0, 0, 0, 1], [0, 0, 1, 0], [..."


In [14]:
df_cau['ISIL_rank'][0]

{'Environment': array([[0, 0, 1, 1],
        [0, 1, 0, 0],
        [1, 0, 0, 0]], dtype=int32),
 'Lung Cancer': array([[0, 1],
        [1, 0]], dtype=int32),
 'Genetics': array([[0, 1, 1, 1],
        [1, 0, 0, 0]], dtype=int32)}

In [15]:
aA_Jc = df_cau['ISIL_rank'][0]['Environment']
aB_Jc = df_cau['ISIL_rank'][0]['Lung Cancer']
print('IS paths: \n {0} \n {1}'.format(nu1,aB_Jc@mu1@np.linalg.pinv(aA_Jc)))

IS paths: 
 [[0.555 0.445 0.345]
 [0.445 0.555 0.655]] 
 [[0.80672385 0.72911082 0.56211041]
 [0.19327615 0.27088918 0.43788959]]


In [16]:
df_par['ISIL_rank'][0]

{'Environment': array([[1, 0, 0, 0],
        [0, 1, 0, 0],
        [0, 0, 1, 1]], dtype=int32),
 'Lung Cancer': array([[1, 0],
        [0, 1]], dtype=int32),
 'Genetics': array([[0, 1, 0, 0],
        [1, 0, 1, 1]], dtype=int32)}

In [17]:
aA_Jp = df_par['ISIL_rank'][0]['Environment']
aB_Jp = df_par['ISIL_rank'][0]['Lung Cancer']
print('IS paths: \n {0} \n {1}'.format(nu1,aB_Jp@mu1@np.linalg.pinv(aA_Jp)))

IS paths: 
 [[0.555 0.445 0.345]
 [0.445 0.555 0.655]] 
 [[0.43788959 0.27088918 0.19327615]
 [0.56211041 0.72911082 0.80672385]]


In [18]:
df_use['ISIL_rank'][0]

{'Environment': array([[1, 0, 0, 0],
        [0, 1, 0, 0],
        [0, 0, 1, 1]], dtype=int32),
 'Lung Cancer': array([[1, 0],
        [0, 1]], dtype=int32),
 'Genetics': array([[1, 1, 1, 0],
        [0, 0, 0, 1]], dtype=int32)}

In [19]:
aA_Ju = df_use['ISIL_rank'][0]['Environment']
aB_Ju = df_use['ISIL_rank'][0]['Lung Cancer']
print('IS paths: \n {0} \n {1}'.format(nu1,aB_Ju@mu1@np.linalg.pinv(aA_Ju)))

IS paths: 
 [[0.555 0.445 0.345]
 [0.445 0.555 0.655]] 
 [[0.43788959 0.27088918 0.19327615]
 [0.56211041 0.72911082 0.80672385]]


## Downstream task: exam

In [20]:
n_simul = 10
n_samples = 10000

In [21]:
samples = np.zeros((n_simul,4))

for i in range(n_simul):
    sample_M1_do0 = M1.simulate(n_samples=n_samples, do={'Environment':0}, show_progress=False)
    samples[i,0] = sample_M1_do0['Lung Cancer'].sum()
    
    samples_AnxPP = np.random.choice(4,size=n_samples,p=np.linalg.pinv(aA_Jc)[:,0])
    samples_J = 0
    for j in range(4):
        n_interv = np.sum(samples_AnxPP==j)
        if(n_interv>0):
            if j==0: 
                sample_M0_do0 = M0.simulate(n_samples=n_interv, do={'Anxiety':0, 'Peer Pressure':0}, show_progress=False)
                samples_J = samples_J + sample_M0_do0['Lung Cancer'].sum()
            if j==1: 
                sample_M0_do0 = M0.simulate(n_samples=n_interv, do={'Anxiety':0, 'Peer Pressure':1}, show_progress=False)
                samples_J = samples_J + sample_M0_do0['Lung Cancer'].sum()
            if j==2: 
                sample_M0_do0 = M0.simulate(n_samples=n_interv, do={'Anxiety':1, 'Peer Pressure':0}, show_progress=False)
                samples_J = samples_J + sample_M0_do0['Lung Cancer'].sum()
            if j==3: 
                sample_M0_do0 = M0.simulate(n_samples=n_interv, do={'Anxiety':1, 'Peer Pressure':1}, show_progress=False)
                samples_J = samples_J + sample_M0_do0['Lung Cancer'].sum()
    samples[i,1] = n_samples - samples_J
    
    samples_AnxPP = np.random.choice(4,size=n_samples,p=np.linalg.pinv(aA_Jp)[:,0])
    samples_J = 0
    for j in range(4):
        n_interv = np.sum(samples_AnxPP==j)
        if(n_interv>0):
            if j==0: 
                sample_M0_do0 = M0.simulate(n_samples=n_interv, do={'Anxiety':0, 'Peer Pressure':0}, show_progress=False)
                samples_J = samples_J + sample_M0_do0['Lung Cancer'].sum()
            if j==1: 
                sample_M0_do0 = M0.simulate(n_samples=n_interv, do={'Anxiety':0, 'Peer Pressure':1}, show_progress=False)
                samples_J = samples_J + sample_M0_do0['Lung Cancer'].sum()
            if j==2: 
                sample_M0_do0 = M0.simulate(n_samples=n_interv, do={'Anxiety':1, 'Peer Pressure':0}, show_progress=False)
                samples_J = samples_J + sample_M0_do0['Lung Cancer'].sum()
            if j==3: 
                sample_M0_do0 = M0.simulate(n_samples=n_interv, do={'Anxiety':1, 'Peer Pressure':1}, show_progress=False)
                samples_J = samples_J + sample_M0_do0['Lung Cancer'].sum()
    samples[i,2] =  samples_J
    
    samples_AnxPP = np.random.choice(4,size=n_samples,p=np.linalg.pinv(aA_Ju)[:,0])
    samples_J = 0
    for j in range(4):
        n_interv = np.sum(samples_AnxPP==j)
        if(n_interv>0):
            if j==0: 
                sample_M0_do0 = M0.simulate(n_samples=n_interv, do={'Anxiety':0, 'Peer Pressure':0}, show_progress=False)
                samples_J = samples_J + sample_M0_do0['Lung Cancer'].sum()
            if j==1: 
                sample_M0_do0 = M0.simulate(n_samples=n_interv, do={'Anxiety':0, 'Peer Pressure':1}, show_progress=False)
                samples_J = samples_J + sample_M0_do0['Lung Cancer'].sum()
            if j==2: 
                sample_M0_do0 = M0.simulate(n_samples=n_interv, do={'Anxiety':1, 'Peer Pressure':0}, show_progress=False)
                samples_J = samples_J + sample_M0_do0['Lung Cancer'].sum()
            if j==3: 
                sample_M0_do0 = M0.simulate(n_samples=n_interv, do={'Anxiety':1, 'Peer Pressure':1}, show_progress=False)
                samples_J = samples_J + sample_M0_do0['Lung Cancer'].sum()
    samples[i,3] = samples_J
    


print('Abstracted model: {0} +- {1}'.format(np.mean(samples[:,0]), np.std(samples[:,0])))
print('J_cau abstraction: {0} +- {1}'.format(np.mean(samples[:,1]), np.std(samples[:,1])))
print('J_par abstraction: {0} +- {1}'.format(np.mean(samples[:,2]), np.std(samples[:,2])))
print('J_use abstraction: {0} +- {1}'.format(np.mean(samples[:,3]), np.std(samples[:,3])))


Abstracted model: 4447.5 +- 31.572931444514303
J_cau abstraction: 1936.8 +- 32.276307099790706
J_par abstraction: 5630.0 +- 52.434721320895754
J_use abstraction: 5574.9 +- 45.33310048959811


In [22]:
samples = np.zeros((n_simul,4))

for i in range(n_simul):
    sample_M1_do0 = M1.simulate(n_samples=n_samples, do={'Environment':1}, show_progress=False)
    samples[i,0] = sample_M1_do0['Lung Cancer'].sum()
    
    samples_AnxPP = np.random.choice(4,size=n_samples,p=np.linalg.pinv(aA_Jc)[:,1])
    samples_J = 0
    for j in range(4):
        n_interv = np.sum(samples_AnxPP==j)
        if(n_interv>0):
            if j==0: 
                sample_M0_do0 = M0.simulate(n_samples=n_interv, do={'Anxiety':0, 'Peer Pressure':0}, show_progress=False)
                samples_J = samples_J + sample_M0_do0['Lung Cancer'].sum()
            if j==1: 
                sample_M0_do0 = M0.simulate(n_samples=n_interv, do={'Anxiety':0, 'Peer Pressure':1}, show_progress=False)
                samples_J = samples_J + sample_M0_do0['Lung Cancer'].sum()
            if j==2: 
                sample_M0_do0 = M0.simulate(n_samples=n_interv, do={'Anxiety':1, 'Peer Pressure':0}, show_progress=False)
                samples_J = samples_J + sample_M0_do0['Lung Cancer'].sum()
            if j==3: 
                sample_M0_do0 = M0.simulate(n_samples=n_interv, do={'Anxiety':1, 'Peer Pressure':1}, show_progress=False)
                samples_J = samples_J + sample_M0_do0['Lung Cancer'].sum()
    samples[i,1] = n_samples - samples_J
    
    samples_AnxPP = np.random.choice(4,size=n_samples,p=np.linalg.pinv(aA_Jp)[:,1])
    samples_J = 0
    for j in range(4):
        n_interv = np.sum(samples_AnxPP==j)
        if(n_interv>0):
            if j==0: 
                sample_M0_do0 = M0.simulate(n_samples=n_interv, do={'Anxiety':0, 'Peer Pressure':0}, show_progress=False)
                samples_J = samples_J + sample_M0_do0['Lung Cancer'].sum()
            if j==1: 
                sample_M0_do0 = M0.simulate(n_samples=n_interv, do={'Anxiety':0, 'Peer Pressure':1}, show_progress=False)
                samples_J = samples_J + sample_M0_do0['Lung Cancer'].sum()
            if j==2: 
                sample_M0_do0 = M0.simulate(n_samples=n_interv, do={'Anxiety':1, 'Peer Pressure':0}, show_progress=False)
                samples_J = samples_J + sample_M0_do0['Lung Cancer'].sum()
            if j==3: 
                sample_M0_do0 = M0.simulate(n_samples=n_interv, do={'Anxiety':1, 'Peer Pressure':1}, show_progress=False)
                samples_J = samples_J + sample_M0_do0['Lung Cancer'].sum()
    samples[i,2] = samples_J
    
    samples_AnxPP = np.random.choice(4,size=n_samples,p=np.linalg.pinv(aA_Ju)[:,1])
    samples_J = 0
    for j in range(4):
        n_interv = np.sum(samples_AnxPP==j)
        if(n_interv>0):
            if j==0: 
                sample_M0_do0 = M0.simulate(n_samples=n_interv, do={'Anxiety':0, 'Peer Pressure':0}, show_progress=False)
                samples_J = samples_J + sample_M0_do0['Lung Cancer'].sum()
            if j==1: 
                sample_M0_do0 = M0.simulate(n_samples=n_interv, do={'Anxiety':0, 'Peer Pressure':1}, show_progress=False)
                samples_J = samples_J + sample_M0_do0['Lung Cancer'].sum()
            if j==2: 
                sample_M0_do0 = M0.simulate(n_samples=n_interv, do={'Anxiety':1, 'Peer Pressure':0}, show_progress=False)
                samples_J = samples_J + sample_M0_do0['Lung Cancer'].sum()
            if j==3: 
                sample_M0_do0 = M0.simulate(n_samples=n_interv, do={'Anxiety':1, 'Peer Pressure':1}, show_progress=False)
                samples_J = samples_J + sample_M0_do0['Lung Cancer'].sum()
    samples[i,3] = samples_J
    


print('Abstracted model: {0} +- {1}'.format(np.mean(samples[:,0]), np.std(samples[:,0])))
print('J_cau abstraction: {0} +- {1}'.format(np.mean(samples[:,1]), np.std(samples[:,1])))
print('J_par abstraction: {0} +- {1}'.format(np.mean(samples[:,2]), np.std(samples[:,2])))
print('J_use abstraction: {0} +- {1}'.format(np.mean(samples[:,3]), np.std(samples[:,3])))


Abstracted model: 5549.3 +- 34.287169611970015
J_cau abstraction: 2706.2 +- 46.893069850458716
J_par abstraction: 7302.4 +- 50.8865404601256
J_use abstraction: 7296.7 +- 51.544252831911336


In [23]:
samples = np.zeros((n_simul,4))

for i in range(n_simul):
    sample_M1_do0 = M1.simulate(n_samples=n_samples, do={'Environment':2}, show_progress=False)
    samples[i,0] = sample_M1_do0['Lung Cancer'].sum()
    
    samples_AnxPP = np.random.choice(4,size=n_samples,p=np.around(np.linalg.pinv(aA_Jc)[:,2],decimals=10))
    samples_J = 0
    for j in range(4):
        n_interv = np.sum(samples_AnxPP==j)
        if(n_interv>0):
            if j==0: 
                sample_M0_do0 = M0.simulate(n_samples=n_interv, do={'Anxiety':0, 'Peer Pressure':0}, show_progress=False)
                samples_J = samples_J + sample_M0_do0['Lung Cancer'].sum()
            if j==1: 
                sample_M0_do0 = M0.simulate(n_samples=n_interv, do={'Anxiety':0, 'Peer Pressure':1}, show_progress=False)
                samples_J = samples_J + sample_M0_do0['Lung Cancer'].sum()
            if j==2: 
                sample_M0_do0 = M0.simulate(n_samples=n_interv, do={'Anxiety':1, 'Peer Pressure':0}, show_progress=False)
                samples_J = samples_J + sample_M0_do0['Lung Cancer'].sum()
            if j==3: 
                sample_M0_do0 = M0.simulate(n_samples=n_interv, do={'Anxiety':1, 'Peer Pressure':1}, show_progress=False)
                samples_J = samples_J + sample_M0_do0['Lung Cancer'].sum()
    samples[i,1] = n_samples - samples_J
    
    samples_AnxPP = np.random.choice(4,size=n_samples,p=np.linalg.pinv(aA_Jp)[:,2])
    samples_J = 0
    for j in range(4):
        n_interv = np.sum(samples_AnxPP==j)
        if(n_interv>0):
            if j==0: 
                sample_M0_do0 = M0.simulate(n_samples=n_interv, do={'Anxiety':0, 'Peer Pressure':0}, show_progress=False)
                samples_J = samples_J + sample_M0_do0['Lung Cancer'].sum()
            if j==1: 
                sample_M0_do0 = M0.simulate(n_samples=n_interv, do={'Anxiety':0, 'Peer Pressure':1}, show_progress=False)
                samples_J = samples_J + sample_M0_do0['Lung Cancer'].sum()
            if j==2: 
                sample_M0_do0 = M0.simulate(n_samples=n_interv, do={'Anxiety':1, 'Peer Pressure':0}, show_progress=False)
                samples_J = samples_J + sample_M0_do0['Lung Cancer'].sum()
            if j==3: 
                sample_M0_do0 = M0.simulate(n_samples=n_interv, do={'Anxiety':1, 'Peer Pressure':1}, show_progress=False)
                samples_J = samples_J + sample_M0_do0['Lung Cancer'].sum()
    samples[i,2] = samples_J
    
    samples_AnxPP = np.random.choice(4,size=n_samples,p=np.linalg.pinv(aA_Ju)[:,2])
    samples_J = 0
    for j in range(4):
        n_interv = np.sum(samples_AnxPP==j)
        if(n_interv>0):
            if j==0: 
                sample_M0_do0 = M0.simulate(n_samples=n_interv, do={'Anxiety':0, 'Peer Pressure':0}, show_progress=False)
                samples_J = samples_J + sample_M0_do0['Lung Cancer'].sum()
            if j==1: 
                sample_M0_do0 = M0.simulate(n_samples=n_interv, do={'Anxiety':0, 'Peer Pressure':1}, show_progress=False)
                samples_J = samples_J + sample_M0_do0['Lung Cancer'].sum()
            if j==2: 
                sample_M0_do0 = M0.simulate(n_samples=n_interv, do={'Anxiety':1, 'Peer Pressure':0}, show_progress=False)
                samples_J = samples_J + sample_M0_do0['Lung Cancer'].sum()
            if j==3: 
                sample_M0_do0 = M0.simulate(n_samples=n_interv, do={'Anxiety':1, 'Peer Pressure':1}, show_progress=False)
                samples_J = samples_J + sample_M0_do0['Lung Cancer'].sum()
    samples[i,3] = samples_J
    


print('Abstracted model: {0} +- {1}'.format(np.mean(samples[:,0]), np.std(samples[:,0])))
print('J_cau abstraction: {0} +- {1}'.format(np.mean(samples[:,1]), np.std(samples[:,1])))
print('J_par abstraction: {0} +- {1}'.format(np.mean(samples[:,2]), np.std(samples[:,2])))
print('J_use abstraction: {0} +- {1}'.format(np.mean(samples[:,3]), np.std(samples[:,3])))


Abstracted model: 6551.8 +- 44.57532949962344
J_cau abstraction: 4384.6 +- 53.868729333445394
J_par abstraction: 8073.2 +- 31.031596800680433
J_use abstraction: 8063.9 +- 35.1580716194731


## Timing 

In [24]:
ts_par = []
for _ in range(n_simul):
    Jparental = [[['Environment','Genetics'], ['Lung Cancer']]]
    t0 = time.time()
    alphas_par,res_par,df_par = rank_solutions(A,Jparental)
    t1 = time.time()
    ts_par.append(t1-t0)
print('J_par time: {0} +- {1}'.format(np.mean(ts_par), np.std(ts_par)))

J_par time: 8.806398344039916 +- 0.027615619112968986


In [25]:
ts_cau = []
for _ in range(n_simul):
    Jcausal = [[['Environment'], ['Lung Cancer']],
          [['Genetics'], ['Lung Cancer']],
          [['Environment','Genetics'],['Lung Cancer']]]
    t0 = time.time()
    alphas_cau,res_cau,df_cau = rank_solutions(A,Jcausal)
    t1 = time.time()
    ts_cau.append(t1-t0)
print('J_cau time: {0} +- {1}'.format(np.mean(ts_cau), np.std(ts_cau)))

J_cau time: 23.921667623519898 +- 0.1328557116751109


In [26]:
ts_use = []
for _ in range(n_simul):
    Juser = [[['Environment'], ['Lung Cancer']]]
    t0 = time.time()
    alphas_use,res_use,df_use = rank_solutions(A,Juser)
    t1 = time.time()
    ts_use.append(t1-t0)
print('J_cau time: {0} +- {1}'.format(np.mean(ts_use), np.std(ts_use)))

J_cau time: 8.089930653572083 +- 0.09501055856625364
