# Measures on LUCAS example: health scenario

This notebook implements the first scenario in the paper.

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from pgmpy.models import BayesianNetwork as BN
from pgmpy.factors.discrete import TabularCPD as CPD
from pgmpy.inference import VariableElimination

from src.SCMMappings_1_1 import Abstraction
from src.measuring import ICEvaluator,IILEvaluator,ISILEvaluator,ISCEvaluator
from src.examples import LUCASmodels as lucas
import src.evaluationsets as es
import src.learning as lng

In [2]:
np.random.seed(0)

In [3]:
def rank_solutions(A):
    measures = [ICEvaluator,IILEvaluator,ISILEvaluator,ISCEvaluator]
    J = es.get_causal_sets_in_M1_with_directed_path_in_M1_and_M0(A.M0,A.M1,A.a)
    
    alphas = []
    results = []
    for m in measures:
        c_errors,c_alphas = lng.list_all_alphas_and_errors(A,J,m)
        alphas.append(c_alphas) 
        results.append(c_errors)
    res = np.array(results) 
    
    sortings = [np.argsort(res[i,:]) for i in range (len(measures))]
    sortedvals = [res[i,:][sortings[i]] for i in range (len(measures))]
    sortedsols = []
    for i in range (len(measures)):
        sortedsols.append([alphas[i][sortings[i][j]] for j in range(len(sortings[i]))])
        
    data = {
        'IC_val' : sortedvals[0],
        'IC_rank' : sortedsols[0],
        'IIL_val' : sortedvals[1],
        'IIL_rank' : sortedsols[1],
        'ISIL_val' : sortedvals[2],
        'ISIL_rank' : sortedsols[2],
        'ISC_val' : sortedvals[3],
        'ISC_rank' : sortedsols[3],
    }
    df = pd.DataFrame(data)

    return alphas,res,df

## LUCAS Model

In [4]:
M0 = lucas.lucas0()

In [5]:
M0do = M0.do(['Smoking'])
infer = VariableElimination(M0do)
P_FS = infer.query(['Smoking','Coughing','Fatigue'])
P_S = infer.query(['Smoking'])
print(P_FS/P_S)

+------------+-------------+------------+---------------------------------+
| Smoking    | Coughing    | Fatigue    |   phi(Smoking,Coughing,Fatigue) |
| Smoking(0) | Coughing(0) | Fatigue(0) |                          0.3240 |
+------------+-------------+------------+---------------------------------+
| Smoking(0) | Coughing(0) | Fatigue(1) |                          0.1933 |
+------------+-------------+------------+---------------------------------+
| Smoking(0) | Coughing(1) | Fatigue(0) |                          0.0696 |
+------------+-------------+------------+---------------------------------+
| Smoking(0) | Coughing(1) | Fatigue(1) |                          0.4131 |
+------------+-------------+------------+---------------------------------+
| Smoking(1) | Coughing(0) | Fatigue(0) |                          0.1205 |
+------------+-------------+------------+---------------------------------+
| Smoking(1) | Coughing(0) | Fatigue(1) |                          0.1101 |
+-----------

In [6]:
mu = (P_FS/P_S).values.reshape((2,4)).T

## Health Model

In [7]:
def healthM(nu):

    M = BN([('Smoking','Health')])

    cpd = CPD(variable='Smoking',
             variable_card = 2,
             values = [[.5],[.5]],
             evidence = None,
             evidence_card = None)
    M.add_cpds(cpd)

    cpd = CPD(variable='Health',
             variable_card = 2,
             values = nu,
             evidence = ['Smoking'],
             evidence_card = [2])
    M.add_cpds(cpd)
    if M.check_model(): return M
    
def healthA(nu):
    M1 = healthM(nu)
    R = ['Smoking','Coughing','Fatigue']
    a = {'Smoking': 'Smoking',
         'Coughing': 'Health',
        'Fatigue': 'Health'}
    return Abstraction(M0,M1,R,a)

In [8]:
nus = [np.array([[.3,.2],[.7,.8]]),
       np.array([[.4,.2],[.6,.8]]),
       np.array([[.5,.2],[.5,.8]])]
       
alphas_nu = []; res_nu = []; df_nu = []
for nu in nus:
    A = healthA(nu)
    alphas,res,df = rank_solutions(A)
    alphas_nu.append(alphas)
    res_nu.append(res)
    df_nu.append(df)

## Abstraction learning

In [9]:
df_nu[0].head()

Unnamed: 0,IC_val,IC_rank,IIL_val,IIL_rank,ISIL_val,ISIL_rank,ISC_val,ISC_rank
0,0.029201,"{'Smoking': [[1, 0], [0, 1]], 'Health': [[0, 1...",0.2424,"{'Smoking': [[1, 0], [0, 1]], 'Health': [[1, 1...",0.029201,"{'Smoking': [[1, 0], [0, 1]], 'Health': [[0, 1...",0.2424,"{'Smoking': [[1, 0], [0, 1]], 'Health': [[1, 1..."
1,0.069582,"{'Smoking': [[1, 0], [0, 1]], 'Health': [[1, 0...",0.242953,"{'Smoking': [[1, 0], [0, 1]], 'Health': [[0, 1...",0.069582,"{'Smoking': [[1, 0], [0, 1]], 'Health': [[1, 0...",0.242953,"{'Smoking': [[1, 0], [0, 1]], 'Health': [[0, 1..."
2,0.076978,"{'Smoking': [[1, 0], [0, 1]], 'Health': [[1, 0...",0.251244,"{'Smoking': [[0, 1], [1, 0]], 'Health': [[0, 1...",0.076978,"{'Smoking': [[1, 0], [0, 1]], 'Health': [[1, 0...",0.251244,"{'Smoking': [[0, 1], [1, 0]], 'Health': [[0, 1..."
3,0.087037,"{'Smoking': [[0, 1], [1, 0]], 'Health': [[0, 1...",0.252111,"{'Smoking': [[1, 0], [0, 1]], 'Health': [[1, 0...",0.087037,"{'Smoking': [[0, 1], [1, 0]], 'Health': [[0, 1...",0.252111,"{'Smoking': [[1, 0], [0, 1]], 'Health': [[1, 0..."
4,0.088358,"{'Smoking': [[1, 0], [0, 1]], 'Health': [[0, 1...",0.256987,"{'Smoking': [[0, 1], [1, 0]], 'Health': [[1, 0...",0.088358,"{'Smoking': [[1, 0], [0, 1]], 'Health': [[0, 1...",0.256987,"{'Smoking': [[0, 1], [1, 0]], 'Health': [[1, 0..."


In [10]:
df_nu[0]['IC_rank'][0]

{'Smoking': array([[1, 0],
        [0, 1]], dtype=int32),
 'Health': array([[0, 1, 1, 0],
        [1, 0, 0, 1]], dtype=int32)}

In [11]:
df_nu[1].head()

Unnamed: 0,IC_val,IC_rank,IIL_val,IIL_rank,ISIL_val,ISIL_rank,ISC_val,ISC_rank
0,0.004652,"{'Smoking': [[1, 0], [0, 1]], 'Health': [[1, 0...",0.192412,"{'Smoking': [[1, 0], [0, 1]], 'Health': [[1, 1...",0.004652,"{'Smoking': [[1, 0], [0, 1]], 'Health': [[1, 0...",0.192412,"{'Smoking': [[1, 0], [0, 1]], 'Health': [[1, 1..."
1,0.076978,"{'Smoking': [[1, 0], [0, 1]], 'Health': [[1, 0...",0.232522,"{'Smoking': [[0, 1], [1, 0]], 'Health': [[0, 0...",0.076978,"{'Smoking': [[1, 0], [0, 1]], 'Health': [[1, 0...",0.232522,"{'Smoking': [[0, 1], [1, 0]], 'Health': [[0, 0..."
2,0.083328,"{'Smoking': [[1, 0], [0, 1]], 'Health': [[1, 1...",0.242953,"{'Smoking': [[1, 0], [0, 1]], 'Health': [[0, 1...",0.083328,"{'Smoking': [[1, 0], [0, 1]], 'Health': [[1, 1...",0.242953,"{'Smoking': [[1, 0], [0, 1]], 'Health': [[0, 1..."
3,0.103276,"{'Smoking': [[1, 0], [0, 1]], 'Health': [[0, 1...",0.252111,"{'Smoking': [[1, 0], [0, 1]], 'Health': [[1, 0...",0.103276,"{'Smoking': [[1, 0], [0, 1]], 'Health': [[0, 1...",0.252111,"{'Smoking': [[1, 0], [0, 1]], 'Health': [[1, 0..."
4,0.132535,"{'Smoking': [[1, 0], [0, 1]], 'Health': [[1, 1...",0.274988,"{'Smoking': [[1, 0], [0, 1]], 'Health': [[1, 1...",0.132535,"{'Smoking': [[1, 0], [0, 1]], 'Health': [[1, 1...",0.274988,"{'Smoking': [[1, 0], [0, 1]], 'Health': [[1, 1..."


In [12]:
df_nu[2].head()

Unnamed: 0,IC_val,IC_rank,IIL_val,IIL_rank,ISIL_val,ISIL_rank,ISC_val,ISC_rank
0,0.026314,"{'Smoking': [[1, 0], [0, 1]], 'Health': [[1, 1...",0.160249,"{'Smoking': [[1, 0], [0, 1]], 'Health': [[1, 1...",0.026314,"{'Smoking': [[1, 0], [0, 1]], 'Health': [[1, 1...",0.160249,"{'Smoking': [[1, 0], [0, 1]], 'Health': [[1, 1..."
1,0.075769,"{'Smoking': [[1, 0], [0, 1]], 'Health': [[1, 0...",0.232522,"{'Smoking': [[0, 1], [1, 0]], 'Health': [[0, 0...",0.075769,"{'Smoking': [[1, 0], [0, 1]], 'Health': [[1, 0...",0.232522,"{'Smoking': [[0, 1], [1, 0]], 'Health': [[0, 0..."
2,0.093007,"{'Smoking': [[1, 0], [0, 1]], 'Health': [[1, 1...",0.242953,"{'Smoking': [[1, 0], [0, 1]], 'Health': [[0, 1...",0.093007,"{'Smoking': [[1, 0], [0, 1]], 'Health': [[1, 1...",0.242953,"{'Smoking': [[1, 0], [0, 1]], 'Health': [[0, 1..."
3,0.126812,"{'Smoking': [[1, 0], [0, 1]], 'Health': [[1, 0...",0.252111,"{'Smoking': [[1, 0], [0, 1]], 'Health': [[1, 0...",0.126812,"{'Smoking': [[1, 0], [0, 1]], 'Health': [[1, 0...",0.252111,"{'Smoking': [[1, 0], [0, 1]], 'Health': [[1, 0..."
4,0.164729,"{'Smoking': [[0, 1], [1, 0]], 'Health': [[0, 0...",0.274988,"{'Smoking': [[1, 0], [0, 1]], 'Health': [[1, 1...",0.164729,"{'Smoking': [[0, 1], [1, 0]], 'Health': [[0, 0...",0.274988,"{'Smoking': [[1, 0], [0, 1]], 'Health': [[1, 1..."


### Best IC 

In [13]:
df_nu[1]['IC_val'][0]

0.004652475014112519

In [14]:
df_nu[2]['IIL_rank'][0]

{'Smoking': array([[1, 0],
        [0, 1]], dtype=int32),
 'Health': array([[1, 1, 1, 0],
        [0, 0, 0, 1]], dtype=int32)}

In [15]:
nu_IC = nus[1]
bestIC = df_nu[1]['IC_rank'][0]
aA_IC = bestIC['Smoking']
aB_IC = bestIC['Health']

In [16]:
print('IC paths: \n {0} \n {1}'.format(aB_IC@mu,nu_IC@aA_IC))

IC paths: 
 [[0.39356216 0.20453014]
 [0.60643784 0.79546986]] 
 [[0.4 0.2]
 [0.6 0.8]]


In [17]:
print('IIL paths: \n {0} \n {1}'.format(mu,np.linalg.pinv(aB_IC)@nu_IC@aA_IC))

IIL paths: 
 [[0.32399659 0.1204845 ]
 [0.19330332 0.11009622]
 [0.06956557 0.08404563]
 [0.41313452 0.68537365]] 
 [[0.2 0.1]
 [0.3 0.4]
 [0.2 0.1]
 [0.3 0.4]]


### Best IIL

In [18]:
df_nu[2]['IIL_val'][0]

0.16024852410132379

In [19]:
df_nu[2]['IIL_rank'][0]

{'Smoking': array([[1, 0],
        [0, 1]], dtype=int32),
 'Health': array([[1, 1, 1, 0],
        [0, 0, 0, 1]], dtype=int32)}

In [20]:
nu_IIL = nus[2]
bestIIL = df_nu[2]['IIL_rank'][0]
aA_IIL = bestIIL['Smoking']
aB_IIL = bestIIL['Health']

In [21]:
print('IC paths: \n {0} \n {1}'.format(aB_IIL@mu,nu_IIL@aA_IIL))

IC paths: 
 [[0.58686548 0.31462635]
 [0.41313452 0.68537365]] 
 [[0.5 0.2]
 [0.5 0.8]]


In [22]:
print('IIL paths: \n {0} \n {1}'.format(mu,np.linalg.pinv(aB_IIL)@nu_IIL@aA_IIL))

IIL paths: 
 [[0.32399659 0.1204845 ]
 [0.19330332 0.11009622]
 [0.06956557 0.08404563]
 [0.41313452 0.68537365]] 
 [[0.16666667 0.06666667]
 [0.16666667 0.06666667]
 [0.16666667 0.06666667]
 [0.5        0.8       ]]


## Downstream task: exam

In [23]:
n_simul = 10
n_samples = 10000
M1_IC = healthM(nu_IC)
M1_IIL = healthM(nu_IIL)

In [24]:
samples = np.zeros((n_simul,4))

for i in range(n_simul):
    sample_M0_do0 = A.M0.simulate(n_samples=n_samples, do={'Smoking':0}, show_progress=False)
    BF_samples = sample_M0_do0[['Coughing','Fatigue']].values
    BF_samples_int = [x[0]*2**1 + x[1]*2**0 for x in BF_samples]
    H_samples = [np.argmax(aB_IC[:,x]) for x in BF_samples_int]
    samples[i,0] = np.sum(H_samples)
    
    sample_M1_do0 = M1_IC.simulate(n_samples=n_samples, do={'Smoking':0}, show_progress=False)
    samples[i,1] = sample_M1_do0['Health'].sum()
    
    sample_M0_do0 = A.M0.simulate(n_samples=n_samples, do={'Smoking':0}, show_progress=False)
    BF_samples = sample_M0_do0[['Coughing','Fatigue']].values
    BF_samples_int = [x[0]*2**1 + x[1]*2**0 for x in BF_samples]
    H_samples = [np.argmax(aB_IIL[:,x]) for x in BF_samples_int]
    samples[i,2] = np.sum(H_samples)

    sample_M1_do0 = M1_IIL.simulate(n_samples=n_samples, do={'Smoking':0}, show_progress=False)
    samples[i,3] = sample_M1_do0['Health'].sum()

print('Base model: {0} +- {1}'.format(np.mean(samples[:,0]), np.std(samples[:,0])))
print('IC_opt abstraction: {0} +- {1}'.format(np.mean(samples[:,1]), np.std(samples[:,1])))

print('Base model: {0} +- {1}'.format(np.mean(samples[:,2]), np.std(samples[:,2])))
print('IIL_opt abstraction: {0} +- {1}'.format(np.mean(samples[:,3]), np.std(samples[:,3])))  

Base model: 6075.2 +- 34.466795615490575
IC_opt abstraction: 6001.9 +- 32.485227411855995
Base model: 4132.8 +- 63.13445968724212
IIL_opt abstraction: 4981.5 +- 56.789523681749614


In [25]:
samples = np.zeros((n_simul,4))

for i in range(n_simul):
    sample_M0_do0 = A.M0.simulate(n_samples=n_samples, do={'Smoking':1}, show_progress=False)
    BF_samples = sample_M0_do0[['Coughing','Fatigue']].values
    BF_samples_int = [x[0]*2**1 + x[1]*2**0 for x in BF_samples]
    H_samples = [np.argmax(aB_IC[:,x]) for x in BF_samples_int]
    samples[i,0] = np.sum(H_samples)
    
    sample_M1_do0 = M1_IC.simulate(n_samples=n_samples, do={'Smoking':1}, show_progress=False)
    samples[i,1] = sample_M1_do0['Health'].sum()
    
    sample_M0_do0 = A.M0.simulate(n_samples=n_samples, do={'Smoking':1}, show_progress=False)
    BF_samples = sample_M0_do0[['Coughing','Fatigue']].values
    BF_samples_int = [x[0]*2**1 + x[1]*2**0 for x in BF_samples]
    H_samples = [np.argmax(aB_IIL[:,x]) for x in BF_samples_int]
    samples[i,2] = np.sum(H_samples)

    sample_M1_do0 = M1_IIL.simulate(n_samples=n_samples, do={'Smoking':1}, show_progress=False)
    samples[i,3] = sample_M1_do0['Health'].sum()

print('Base model: {0} +- {1}'.format(np.mean(samples[:,0]), np.std(samples[:,0])))
print('IC_opt abstraction: {0} +- {1}'.format(np.mean(samples[:,1]), np.std(samples[:,1])))

print('Base model: {0} +- {1}'.format(np.mean(samples[:,2]), np.std(samples[:,2])))
print('IIL_opt abstraction: {0} +- {1}'.format(np.mean(samples[:,3]), np.std(samples[:,3])))  

Base model: 7968.2 +- 33.36405251164792
IC_opt abstraction: 7994.8 +- 42.64223258695539
Base model: 6814.8 +- 48.45988031351295
IIL_opt abstraction: 7986.6 +- 43.79771683546986


## Downstream task: car accident

In [26]:
stats = np.zeros((n_simul,3))

for s in range(n_simul):
    sample_M0_do0 = A.M0.simulate(n_samples=n_samples, do={'Smoking':0}, show_progress=False)
    stats[s,0] = np.sum(sample_M0_do0['Car Accident'].values)

    # Simulate M1 (IC optimized) and record the number of Hea=0 and Hea=1
    sample_M1_do0 = M1_IC.simulate(n_samples=n_samples, do={'Smoking':0}, show_progress=False)
    H_samples = sample_M1_do0['Health'].values
    n_H1_samples = np.sum(H_samples)
    n_H0_samples = n_samples - np.sum(H_samples)

    # For each Hea=0 and Hea=1, compute a possible combination of (Cou,Fat)
    n_CF_samples = np.zeros(4)

    CF_samples = np.random.choice(4,size=n_H0_samples,p=np.linalg.pinv(aB_IC)[:,0])
    for i in range(4):
        n_CF_samples[i] += np.sum(CF_samples==i)
    #print(np.linalg.pinv(aB_IC),n_CF_samples)

    CF_samples = np.random.choice(4,size=n_H1_samples,p=np.linalg.pinv(aB_IC)[:,1])
    for i in range(4):
        n_CF_samples[i] += np.sum(CF_samples==i)

    # Simulate (M0) assuming that (Cou,Fat) have been computed in M1 (IC optimized)
    CA_samples = np.array([])
    for i in range(4):
        do_Cou = int(np.binary_repr(i,width=2)[1])
        do_Fat = int(np.binary_repr(i,width=2)[0])
        #print(np.binary_repr(i,width=2),do_Cou,do_Fat)
        sample_M01_do0 = A.M0.simulate(n_samples=int(n_CF_samples[i]), do={'Smoking':0,'Coughing':do_Cou,'Fatigue':do_Fat}, show_progress=False)
        CA_samples = np.concatenate((CA_samples, sample_M01_do0['Car Accident'].values))
    stats[s,1] = np.sum(sample_M01_do0['Car Accident'].values)
        
    # Simulate M1 (IIL optimized) and record the number of Hea=0 and Hea=1
    sample_M1_do0 = M1_IIL.simulate(n_samples=n_samples, do={'Smoking':0}, show_progress=False)
    H_samples = sample_M1_do0['Health'].values
    n_H1_samples = np.sum(H_samples)
    n_H0_samples = n_samples - np.sum(H_samples)

    # For each Hea=0 and Hea=1, compute a possible combination of (Cou,Fat)
    n_CF_samples = np.zeros(4)

    CF_samples = np.random.choice(4,size=n_H0_samples,p=np.linalg.pinv(aB_IIL)[:,0])
    for i in range(4):
        n_CF_samples[i] += np.sum(CF_samples==i)

    CF_samples = np.random.choice(4,size=n_H1_samples,p=np.linalg.pinv(aB_IIL)[:,1])
    for i in range(4):
        n_CF_samples[i] += np.sum(CF_samples==i)

    # Simulate (M0) assuming that (Cou,Fat) have been computed in M1 (IIL optimized)
    CA_samples = np.array([])
    for i in range(4):
        do_Cou = int(np.binary_repr(i,width=2)[1])
        do_Fat = int(np.binary_repr(i,width=2)[0])
        sample_M02_do0 = A.M0.simulate(n_samples=int(n_CF_samples[i]), do={'Smoking':0,'Coughing':do_Cou,'Fatigue':do_Fat}, show_progress=False)
        CA_samples = np.concatenate((CA_samples, sample_M02_do0['Car Accident'].values))
    
    stats[s,2] = np.sum(sample_M02_do0['Car Accident'].values)

In [27]:
for i in range(3):
    print('{0} +- {1}'.format(np.mean(stats[:,i]),np.std(stats[:,i])))

6785.9 +- 37.52719014261526
2562.2 +- 56.82393861745242
4266.5 +- 53.917065943910565


In [28]:
stats = np.zeros((n_simul,3))

for s in range(n_simul):
    sample_M0_do0 = A.M0.simulate(n_samples=n_samples, do={'Smoking':1}, show_progress=False)
    stats[s,0] = np.sum(sample_M0_do0['Car Accident'].values)

    # Simulate M1 (IC optimized) and record the number of Hea=0 and Hea=1
    sample_M1_do0 = M1_IC.simulate(n_samples=n_samples, do={'Smoking':1}, show_progress=False)
    H_samples = sample_M1_do0['Health'].values
    n_H1_samples = np.sum(H_samples)
    n_H0_samples = n_samples - np.sum(H_samples)

    # For each Hea=0 and Hea=1, compute a possible combination of (Cou,Fat)
    n_CF_samples = np.zeros(4)

    CF_samples = np.random.choice(4,size=n_H0_samples,p=np.linalg.pinv(aB_IC)[:,0])
    for i in range(4):
        n_CF_samples[i] += np.sum(CF_samples==i)
    #print(np.linalg.pinv(aB_IC),n_CF_samples)

    CF_samples = np.random.choice(4,size=n_H1_samples,p=np.linalg.pinv(aB_IC)[:,1])
    for i in range(4):
        n_CF_samples[i] += np.sum(CF_samples==i)

    # Simulate (M0) assuming that (Cou,Fat) have been computed in M1 (IC optimized)
    CA_samples = np.array([])
    for i in range(4):
        do_Cou = int(np.binary_repr(i,width=2)[1])
        do_Fat = int(np.binary_repr(i,width=2)[0])
        #print(np.binary_repr(i,width=2),do_Cou,do_Fat)
        sample_M01_do0 = A.M0.simulate(n_samples=int(n_CF_samples[i]), do={'Smoking':1,'Coughing':do_Cou,'Fatigue':do_Fat}, show_progress=False)
        CA_samples = np.concatenate((CA_samples, sample_M01_do0['Car Accident'].values))
    stats[s,1] = np.sum(sample_M01_do0['Car Accident'].values)
        
    # Simulate M1 (IIL optimized) and record the number of Hea=0 and Hea=1
    sample_M1_do0 = M1_IIL.simulate(n_samples=n_samples, do={'Smoking':1}, show_progress=False)
    H_samples = sample_M1_do0['Health'].values
    n_H1_samples = np.sum(H_samples)
    n_H0_samples = n_samples - np.sum(H_samples)

    # For each Hea=0 and Hea=1, compute a possible combination of (Cou,Fat)
    n_CF_samples = np.zeros(4)

    CF_samples = np.random.choice(4,size=n_H0_samples,p=np.linalg.pinv(aB_IIL)[:,0])
    for i in range(4):
        n_CF_samples[i] += np.sum(CF_samples==i)

    CF_samples = np.random.choice(4,size=n_H1_samples,p=np.linalg.pinv(aB_IIL)[:,1])
    for i in range(4):
        n_CF_samples[i] += np.sum(CF_samples==i)

    # Simulate (M0) assuming that (Cou,Fat) have been computed in M1 (IIL optimized)
    CA_samples = np.array([])
    for i in range(4):
        do_Cou = int(np.binary_repr(i,width=2)[1])
        do_Fat = int(np.binary_repr(i,width=2)[0])
        sample_M02_do0 = A.M0.simulate(n_samples=int(n_CF_samples[i]), do={'Smoking':1,'Coughing':do_Cou,'Fatigue':do_Fat}, show_progress=False)
        CA_samples = np.concatenate((CA_samples, sample_M02_do0['Car Accident'].values))
    
    stats[s,2] = np.sum(sample_M02_do0['Car Accident'].values)

In [29]:
for i in range(3):
    print('{0} +- {1}'.format(np.mean(stats[:,i]),np.std(stats[:,i])))

7656.9 +- 51.66323644527121
3411.9 +- 47.94048393581358
6801.0 +- 51.69912958648337
