# Bayes Network 

# Alarm Network Example with python

In [1]:
import pandas as pd
import numpy as np

In [2]:
B = pd.DataFrame([[1, 0.001],[0, 0.999]], columns=["B","P(B)"])

In [3]:
B

Unnamed: 0,B,P(B)
0,1,0.001
1,0,0.999


In [4]:
E = pd.DataFrame([[1, 0.002],[0, 0.998]], columns=["E","P(E)"])

In [5]:
E

Unnamed: 0,E,P(E)
0,1,0.002
1,0,0.998


In [6]:
JA = pd.DataFrame([[1, 1, 0.9],[1,0,0.1],[0,1,0.05],[0,0,0.95]], columns=["A","J","P(J|A)"])

In [7]:
JA

Unnamed: 0,A,J,P(J|A)
0,1,1,0.9
1,1,0,0.1
2,0,1,0.05
3,0,0,0.95


In [8]:
MA = pd.DataFrame([[1, 1, 0.7],[1,0,0.3],[0,1,0.01],[0,0,0.99]], columns=["A","M","P(M|A)"])

In [9]:
MA

Unnamed: 0,A,M,P(M|A)
0,1,1,0.7
1,1,0,0.3
2,0,1,0.01
3,0,0,0.99


In [10]:
BEA = pd.DataFrame([[1, 1, 1, 0.95],[1,1,0, 0.05],[1,0,1,0.94],[1,0,0,0.06],[0,1,1,0.29],[0,1,0,0.71],[0,0,1,0.001],[0,0,0,0.999]], columns=["B","E","A","P(A|B,E)"])

In [11]:
BEA

Unnamed: 0,B,E,A,"P(A|B,E)"
0,1,1,1,0.95
1,1,1,0,0.05
2,1,0,1,0.94
3,1,0,0,0.06
4,0,1,1,0.29
5,0,1,0,0.71
6,0,0,1,0.001
7,0,0,0,0.999


### Q1. Time complexity of JPT
Can you think about what is the time complexity of calculating the joint probability table (JPT) as shown below? (Hint: for loops)

In [12]:
def mult(l):
    m=1
    for el in l:
        m*= np.array(el)
    return m[0]

alljoint=[]
for b in [1,0]:
    for e in [1,0]:
        for a in [1,0]:
            for j in [1,0]:
                for m in [1,0]:
                    p = mult([B[B["B"]==b]["P(B)"],E[E["E"]==e]["P(E)"],BEA[(BEA["B"]==b)&(BEA["E"]==e)&(BEA["A"]==a)]["P(A|B,E)"],JA[(JA["J"]==j)&(JA["A"]==a)]["P(J|A)"],MA[(MA["M"]==m)&(MA["A"]==a)]["P(M|A)"]])
                    alljoint.append([b,e,a,j,m,p])

In [13]:
JPT = pd.DataFrame(alljoint,columns=['B','E','A','J','M','P'])

In [14]:
JPT.sum(axis=0) #note: joint probability should sum to 1

B    16.0
E    16.0
A    16.0
J    16.0
M    16.0
P     1.0
dtype: float64

In [15]:
JPT

Unnamed: 0,B,E,A,J,M,P
0,1,1,1,1,1,1.197e-06
1,1,1,1,1,0,5.13e-07
2,1,1,1,0,1,1.33e-07
3,1,1,1,0,0,5.7e-08
4,1,1,0,1,1,5e-11
5,1,1,0,1,0,4.95e-09
6,1,1,0,0,1,9.5e-10
7,1,1,0,0,0,9.405e-08
8,1,0,1,1,1,0.0005910156
9,1,0,1,1,0,0.0002532924


In [16]:
def pcall(s): #this fuction pulls a joint probabilty (from the JPT) of given variables
    b,e,a,j,m = s
    return np.array(JPT[(JPT['B']==int(b))&(JPT['E']==int(e))&(JPT['A']==int(a))&(JPT['J']==int(j))&(JPT['M']==int(m))]['P'])[0]

In [17]:
pcall('00111') #cheking textbook example: 3rd ed. p.514 P(-b,-e,+a,+j,+m)

0.0006281112599999999

In [18]:
0.9*0.7*0.001*0.999*0.998

0.0006281112599999999

## Inference by Enumeration
Now let's compare table-based method (using JPT) vs. Bayesnet method (using smaller tables) 

$P(B| +j,+m) \propto P(B,+j,+m)$

$= \sum_{e,a}P(B, e, a, +j, +m)~~~$   (JPT)

$= \sum_{e,a}P(B)P(e)P(a|B,e)P(+j|a)P(+m|a)~~~~~$   (Bayes)

Note that capital letter means still a random variable (have not decided) and the lower letter means the value has been assigned. Enumerating on a variable means to go over all the domain of that variable.

In [19]:
# JPT table enumeration (manual process)
# step 1: subset the condition
# step 2: sum out the hidden variables
# step 3: normalize

In [20]:
#step1
JPT[(JPT["J"]==1)&(JPT["M"]==1)]

Unnamed: 0,B,E,A,J,M,P
0,1,1,1,1,1,1.197e-06
4,1,1,0,1,1,5e-11
8,1,0,1,1,1,0.0005910156
12,1,0,0,1,1,2.994e-08
16,0,1,1,1,1,0.0003650346
20,0,1,0,1,1,7.0929e-07
24,0,0,1,1,1,0.0006281113
28,0,0,0,1,1,0.0004980025


In [21]:
#step2
#which is query variable, and which are hidden variables?
#ans: B is the query variable and E and A are hidden variables
#so, let's sum over E and A
evidence = JPT[(JPT["J"]==1)&(JPT["M"]==1)]
g = evidence.groupby("B")

In [22]:
g.sum()

Unnamed: 0_level_0,E,A,J,M,P
B,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,2,2,4,4,0.001492
1,2,2,4,4,0.000592


In [23]:
#step3
Z = g.sum()["P"].sum()
g.sum()["P"]/Z

B
0    0.715828
1    0.284172
Name: P, dtype: float64

In [24]:
#JPT table enumeration (using for loops)
#we enumerate over e, and a with fixed J=+j, M=+m 
#B is the query variable, so we want to calculate for the each domain value of B

def stringfy(l):
    s = ''
    for el in l:
        s+=str(el)
    return s    
    
j=1
m=1
Pb_jm=[]
z=0
pb_jm=[]
for b in [0,1]:
    zea=0
    for e in [0,1]:
        za=0
        for a in [0,1]:
            s = stringfy([b,e,a,j,m])
            p = pcall(s)
            za+=p
        zea+=za #collecting the sum over e and a
    pb_jm.append(zea)    
    z+=zea # z is the final normalization constant, as the conditional prob of one query variable given a condition should sum up to 1. 
    
result = pd.DataFrame(np.array(pb_jm).transpose()/z,columns=["P"],index=[0,1])    


In [25]:
result #we checked that it's the same as manual process shown above

Unnamed: 0,P
0,0.715828
1,0.284172


Now, let's try Baysnet with enumeration   
$\sum_{e,a}P(B)P(e)P(a|B,e)P(+j|a)P(+m|a)~~~~~$ 

In [26]:
j=1
m=1
pb_jm_Bayes=[]
z=0
for b in [0,1]:
    zea=0
    for e in [0,1]:
        za=0
        for a in [0,1]:
            p = mult([B[B["B"]==b]["P(B)"],E[E["E"]==e]["P(E)"],BEA[(BEA["B"]==b)&(BEA["E"]==e)&(BEA["A"]==a)]["P(A|B,E)"],JA[(JA["J"]==j)&(JA["A"]==a)]["P(J|A)"],MA[(MA["M"]==m)&(MA["A"]==a)]["P(M|A)"]])
            za+=p
        zea+=za #collecting the sum over e and a
    pb_jm_Bayes.append(zea)    
    z+=zea  

result = pd.DataFrame(np.array(pb_jm_Bayes).transpose()/z,columns=["P"],index=[0,1])    

In [27]:
result

Unnamed: 0,P
0,0.715828
1,0.284172


### Sanity check
Just to make sure, let's compare with hand calculation of the Bayesnet approach.    
$\sum_{e,a}P(B)P(e)P(a|B,e)P(+j|a)P(+m|a)~~~~~$

$=P(B)P(+e)P(+a|B,+e)P(+j|+a)P(+m|+a) \\
+P(B)P(+e)P(-a|B,+e)P(+j|-a)P(+m|-a) \\
+P(B)P(-e)P(+a|B,-e)P(+j|+a)P(+m|+a) \\
+P(B)P(-e)P(-a|B,-e)P(+j|-a)P(+m|-a)$

In [28]:
def PBayes(b,e,a,j,m):
    return mult([B[B["B"]==b]["P(B)"],E[E["E"]==e]["P(E)"],BEA[(BEA["B"]==b)&(BEA["E"]==e)&(BEA["A"]==a)]["P(A|B,E)"],JA[(JA["J"]==j)&(JA["A"]==a)]["P(J|A)"],MA[(MA["M"]==m)&(MA["A"]==a)]["P(M|A)"]])

def calc(b):
    #hidden vars = e, a (to be enumerated)
    #query var = b (to be queried)
    #evidences j=1, m=1
    return PBayes(b,1,1,j=1,m=1)+PBayes(b,1,0,j=1,m=1)+PBayes(b,0,1,j=1,m=1)+PBayes(b,0,0,j=1,m=1)
    

In [29]:
z = calc(0)+calc(1) #we still need normalization
result = pd.DataFrame(np.array([calc(0),calc(1)]).transpose()/z,columns=["P"],index=[0,1]) 
result

Unnamed: 0,P
0,0.715828
1,0.284172


### Time it!
Now let's time the two methods and compare the efficiency.

In [30]:
import time

def JPT_enumeration():
    alljoint=[]
    for b in [1,0]:
        for e in [1,0]:
            for a in [1,0]:
                for j in [1,0]:
                    for m in [1,0]:
                        p = mult([B[B["B"]==b]["P(B)"],E[E["E"]==e]["P(E)"],BEA[(BEA["B"]==b)&(BEA["E"]==e)&(BEA["A"]==a)]["P(A|B,E)"],JA[(JA["J"]==j)&(JA["A"]==a)]["P(J|A)"],MA[(MA["M"]==m)&(MA["A"]==a)]["P(M|A)"]])
                        alljoint.append([b,e,a,j,m,p])
    
    j=1
    m=1
    Pb_jm=[]
    z=0
    pb_jm=[]
    for b in [0,1]:
        zea=0
        for e in [0,1]:
            za=0
            for a in [0,1]:
                s = stringfy([b,e,a,j,m])
                p = pcall(s)
                za+=p
            zea+=za #collecting the sum over e and a
        pb_jm.append(zea)    
        z+=zea 
    return np.array(pb_jm).transpose()/z

def Bayes_enumeration():
    j=1
    m=1
    pb_jm_Bayes=[]
    z=0
    for b in [0,1]:
        zea=0
        for e in [0,1]:
            za=0
            for a in [0,1]:
                p = mult([B[B["B"]==b]["P(B)"],E[E["E"]==e]["P(E)"],BEA[(BEA["B"]==b)&(BEA["E"]==e)&(BEA["A"]==a)]["P(A|B,E)"],JA[(JA["J"]==j)&(JA["A"]==a)]["P(J|A)"],MA[(MA["M"]==m)&(MA["A"]==a)]["P(M|A)"]])
                za+=p
            zea+=za #collecting the sum over e and a
        pb_jm_Bayes.append(zea)    
        z+=zea  
    return np.array(pb_jm_Bayes).transpose()/z    

In [31]:
t1 = time.time()
for _ in range(100):
    JPT_enumeration()
t2 = time.time()
print(t2-t1) 

14.658199071884155


In [32]:
t1 = time.time()
for _ in range(100):
    Bayes_enumeration()
t2 = time.time()
print(t2-t1)

3.4269232749938965


## Conclusion on JTP enumeration vs. Bayesnet enumeration
We got the same results for enumerating the joint prob table (JPT) and enumerating the small tables using the Bayes net approach. We still need the normalization trick for both methods but how the individual joint probability is different: The first approach first calculate all joint probability (we need nested for loops for all the variables) then enumerate, whereas the second approach enumerates only for the hidden variables thus, has less for loops. In this way, we can save some calculations/time.  

#### Note on the timing test
It takes longer time to make the JPT table. Note that reading off from the JPT table itself takes less time than reading off numbers from the multiple tables- perhaps because of reading time overhead per each table. However, the JPT creating part is a part of the enumeration method.

# Variable Elimination
Variable elimination eliminate a variable after the join operation.
Below are known tables.

In [33]:
B

Unnamed: 0,B,P(B)
0,1,0.001
1,0,0.999


In [34]:
E

Unnamed: 0,E,P(E)
0,1,0.002
1,0,0.998


In [35]:
BEA

Unnamed: 0,B,E,A,"P(A|B,E)"
0,1,1,1,0.95
1,1,1,0,0.05
2,1,0,1,0.94
3,1,0,0,0.06
4,0,1,1,0.29
5,0,1,0,0.71
6,0,0,1,0.001
7,0,0,0,0.999


In [36]:
JA

Unnamed: 0,A,J,P(J|A)
0,1,1,0.9
1,1,0,0.1
2,0,1,0.05
3,0,0,0.95


In [37]:
MA

Unnamed: 0,A,M,P(M|A)
0,1,1,0.7
1,1,0,0.3
2,0,1,0.01
3,0,0,0.99


In [38]:
#step0.
j=1
m=1
PjA = JA[JA["J"]==j][["A","P(J|A)"]]
PmA = MA[MA["M"]==m][["A","P(M|A)"]]

In [39]:
PjA

Unnamed: 0,A,P(J|A)
0,1,0.9
2,0,0.05


In [40]:
PmA

Unnamed: 0,A,P(M|A)
0,1,0.7
2,0,0.01


In [41]:
#step 1: Choose A to eliminate
#step 1a: join the tables P(A|BE), P(j|A), P(m|A) to get P(j,m,A|B,E)
PjmA_BE = BEA.merge(PjA, on=('A')).merge(PmA, on=('A'))
PjmA_BE["P(j,m,A|B,E)"] = PjmA_BE["P(A|B,E)"]*PjmA_BE["P(J|A)"]*PjmA_BE["P(M|A)"]

In [42]:
PjmA_BE

Unnamed: 0,B,E,A,"P(A|B,E)",P(J|A),P(M|A),"P(j,m,A|B,E)"
0,1,1,1,0.95,0.9,0.7,0.5985
1,1,0,1,0.94,0.9,0.7,0.5922
2,0,1,1,0.29,0.9,0.7,0.1827
3,0,0,1,0.001,0.9,0.7,0.00063
4,1,1,0,0.05,0.05,0.01,2.5e-05
5,1,0,0,0.06,0.05,0.01,3e-05
6,0,1,0,0.71,0.05,0.01,0.000355
7,0,0,0,0.999,0.05,0.01,0.0005


In [43]:
#step 1b: sum out A to get P(j,m|B,E)
Pjm_BE = PjmA_BE.groupby(["B","E"],as_index=False).sum().drop(["A","P(A|B,E)","P(J|A)","P(M|A)"],axis=1).rename(columns={"P(j,m,A|B,E)":"P(j,m|B,E)"})
Pjm_BE

Unnamed: 0,B,E,"P(j,m|B,E)"
0,0,0,0.00113
1,0,1,0.183055
2,1,0,0.59223
3,1,1,0.598525


In [44]:
#step2: choose E to eliminate 
#2a: join the tables P(E)and  P(j,m|B,E) to get P(j,m,E|B)
PjmE_B = Pjm_BE.merge(E, on=('E'))
PjmE_B["P(j,m,E|B)"] = PjmE_B["P(j,m|B,E)"]*PjmE_B["P(E)"]
PjmE_B

Unnamed: 0,B,E,"P(j,m|B,E)",P(E),"P(j,m,E|B)"
0,0,0,0.00113,0.998,0.001127
1,1,0,0.59223,0.998,0.591046
2,0,1,0.183055,0.002,0.000366
3,1,1,0.598525,0.002,0.001197


In [45]:
#2b: sum out E to get P(j,m|B)
Pjm_B = PjmE_B.groupby(["B"],as_index=False).sum().drop(["E","P(j,m|B,E)","P(E)"],axis=1).rename(columns={"P(j,m,E|B)":"P(j,m|B)"})
Pjm_B

Unnamed: 0,B,"P(j,m|B)"
0,0,0.001493
1,1,0.592243


In [46]:
#step3: finish with B
#3a: join the tables P(B)and  P(j,m|B) to get P(j,m,B)
PjmB = Pjm_B.merge(B, on=('B'))
PjmB["P(j,m,B)"] = PjmB["P(j,m|B)"]*PjmB["P(B)"]
PjmB

Unnamed: 0,B,"P(j,m|B)",P(B),"P(j,m,B)"
0,0,0.001493,0.999,0.001492
1,1,0.592243,0.001,0.000592


In [47]:
#3b: normalize
PB_jm = PjmB[["B"]] 
PB_jm["P(B|j,m)"] = PjmB["P(j,m,B)"]/PjmB["P(j,m,B)"].sum()
PB_jm

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,B,"P(B|j,m)"
0,0,0.715828
1,1,0.284172


### Time it!
The moment of truth...
The claim is that this should be faster than enumeration..

In [48]:
def VE():
    j=1
    m=1
    PjA = JA[JA["J"]==j][["A","P(J|A)"]]
    PmA = MA[MA["M"]==m][["A","P(M|A)"]]
    
    PjmA_BE = BEA.merge(PjA, on=('A')).merge(PmA, on=('A'))
    PjmA_BE["P(j,m,A|B,E)"] = PjmA_BE["P(A|B,E)"]*PjmA_BE["P(J|A)"]*PjmA_BE["P(M|A)"]
    Pjm_BE = PjmA_BE.groupby(["B","E"],as_index=False).sum().drop(["A","P(A|B,E)","P(J|A)","P(M|A)"],axis=1).rename(columns={"P(j,m,A|B,E)":"P(j,m|B,E)"})

    PjmE_B = Pjm_BE.merge(E, on=('E'))
    PjmE_B["P(j,m,E|B)"] = PjmE_B["P(j,m|B,E)"]*PjmE_B["P(E)"]
    Pjm_B = PjmE_B.groupby(["B"],as_index=False).sum().drop(["E","P(j,m|B,E)","P(E)"],axis=1).rename(columns={"P(j,m,E|B)":"P(j,m|B)"})

    PjmB = Pjm_B.merge(B, on=('B'))
    PjmB["P(j,m,B)"] = PjmB["P(j,m|B)"]*PjmB["P(B)"]
    PB_jm = PjmB[["B"]] 
    PB_jm["P(B|j,m)"] = PjmB["P(j,m,B)"]/PjmB["P(j,m,B)"].sum()
    return PB_jm["P(B|j,m)"]


In [49]:
t1=time.time()
for _ in range(100):
    VE()
t2=time.time()
print(t2-t1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


4.192529678344727


Due to the pandas data frame readin inefficiency, the operations were slower than expected (I've got 4-8s in my computer).
So let's try something else.

In [50]:
#initialize just in case things have been modified
B = pd.DataFrame([[1, 0.001],[0, 0.999]], columns=["B","P(B)"])
E = pd.DataFrame([[1, 0.002],[0, 0.998]], columns=["E","P(E)"])
JA = pd.DataFrame([[1, 1, 0.9],[1,0,0.1],[0,1,0.05],[0,0,0.95]], columns=["A","J","P(J|A)"])
MA = pd.DataFrame([[1, 1, 0.7],[1,0,0.3],[0,1,0.01],[0,0,0.99]], columns=["A","M","P(M|A)"])
BEA = pd.DataFrame([[1, 1, 1, 0.95],[1,1,0, 0.05],[1,0,1,0.94],[1,0,0,0.06],[0,1,1,0.29],[0,1,0,0.71],[0,0,1,0.001],[0,0,0,0.999]], columns=["B","E","A","P(A|B,E)"])

In [51]:
#change the data structure to dictionary
dB = dict(zip(B["B"],B["P(B)"]))
dE = dict(zip(E["E"],E["P(E)"]))
dPjA = dict(zip(PjA['A'],PjA["P(J|A)"]))
dPmA = dict(zip(PmA['A'],PmA["P(M|A)"]))
dPA_BE = dict(zip(list(zip(BEA["B"],BEA["E"],BEA["A"])),np.array(BEA["P(A|B,E)"])))

In [54]:
from collections import defaultdict
def VE2():
    #step 1: Choose A to eliminate
    #step 1a: join the tables P(A|BE), P(j|A), P(m|A) to get P(j,m,A|B,E)
    dPjmA_BE=dict()
    for k,v in dPA_BE.items():
        #print(k, v*dPjA[k[2]]*dPmA[k[2]])
        dPjmA_BE.update({k:v*dPjA[k[2]]*dPmA[k[2]]})
    #step 1b: sum out A to get P(j,m|B,E)
    dPjm_BE = defaultdict(float)
    for key, val in dPjmA_BE.items():
        dPjm_BE[key[:2]]+=val

    #step2: choose E to eliminate 
    #2a: join the tables P(E)and  P(j,m|B,E) to get P(j,m,E|B)
    dPjmE_B=dict()
    for k,v in dPjm_BE.items():
        dPjmE_B.update({k:v*dE[k[1]]})
    #2b: sum out E to get P(j,m|B)    
    dPjm_B = defaultdict(float)
    for key, val in dPjmE_B.items():
        dPjm_B[key[:1]]+=val

    #step3: finish with B
    #3a: join the tables P(B)and  P(j,m|B) to get P(j,m,B)
    dPjmB=dict()
    for k,v in dPjm_B.items():
        dPjmB.update({k[0]:v*dB[k[0]]})
    #3b: normalize  
    z = sum(list(dPjmB.values()))
    return {1:dPjmB[1]/z,0:dPjmB[0]/z}

In [55]:
VE2() #the results are consistent

{1: 0.2841718353643929, 0: 0.7158281646356071}

In [56]:
t1=time.time()
for _ in range(100):
    VE2()
t2=time.time()
print(t2-t1)

0.0013129711151123047


OMG???
It was way too fast!    
I'm not sure if it's fair, as other methods still use pandas dataframe which may have slow reading or operation.    
You can check apple to apple (by convering dataframes to dictionaries) if interested.    
Let me know what you find- computation speed for JPT enumeration vs. BayesNet enumeration vs. Variable elimination methods!