In [166]:
#Import libraries
import numpy as np
from pgmpy.factors.discrete import TabularCPD
from pgmpy.models import BayesianModel
from IPython.display import Image

In [40]:
![alt text](BayesianNetwork.png "Title")

/bin/sh: 1: Syntax error: "(" unexpected


### Create the Bayesian Network

In [4]:
#Create the Bayesian Network model
student_model = BayesianModel([('Difficulty', 'Grade'),
                              ('Intelligence', 'Grade'),
                              ('Intelligence', 'SAT'),
                              ('Grade', 'Letter')])

#### Define all the probabilities

In [116]:
difficulty_cpd = TabularCPD(
    variable = 'Difficulty',
    variable_card = 2,
    values = [[.6, .4]])

In [117]:
intelligence_cpd = TabularCPD(
    variable = 'Intelligence',
    variable_card = 2,
    values = [[.7, .3]])

In [208]:
sat_cpd = TabularCPD(
    variable = 'SAT',
    variable_card = 2,
    values = [[.95, .2],
             [0.05, .8]],
    evidence = ['Intelligence'],
    evidence_card = [2])    

In [161]:
grade_cpd = TabularCPD(
    variable = 'Grade', 
    variable_card = 3,
    values = [[.3, .05, .9, .5],
             [.4, .25, .08, .3],
             [.3, .7, .02, .2]],
    evidence = ['Difficulty', 'Intelligence'],
    evidence_card = [2,2])

In [120]:
letter_cpd = TabularCPD(
    variable = 'Letter',
    variable_card = 2,
    values = [[0.1, 0.4, 0.99],
             [0.9, 0.6, 0.01]],
    evidence = ['Grade'],
    evidence_card = [3])

### Relation the model with the probabilities

In [209]:
student_model.add_cpds(difficulty_cpd, intelligence_cpd, sat_cpd, grade_cpd, letter_cpd)



### The structure

In [11]:
student_model.get_cpds()

[<TabularCPD representing P(Difficulty:2) at 0x7fa51a7968d0>,
 <TabularCPD representing P(Intelligence:2) at 0x7fa4e557f128>,
 <TabularCPD representing P(SAT:2 | Intelligence:2) at 0x7fa4e557f470>,
 <TabularCPD representing P(Grade:3 | Difficulty:2, Intelligence:2) at 0x7fa4e557f748>,
 <TabularCPD representing P(Letter:2 | Grade:3) at 0x7fa4e557f5c0>]

### Fine active trail nodes

In [12]:
student_model.active_trail_nodes('Grade')

{'Grade': {'Difficulty', 'Grade', 'Intelligence', 'Letter', 'SAT'}}

In [13]:
student_model.active_trail_nodes('SAT')

{'SAT': {'Grade', 'Intelligence', 'Letter', 'SAT'}}

### Find local independencies

In [14]:
student_model.local_independencies('SAT')

(SAT _|_ Difficulty, Letter, Grade | Intelligence)

In [15]:
student_model.local_independencies('Grade')

(Grade _|_ SAT, Letter | Intelligence, Difficulty)

In [16]:
student_model.local_independencies('Letter')

(Letter _|_ SAT, Intelligence, Difficulty | Grade)

### Get all independencies

In [17]:
student_model.get_independencies()

(SAT _|_ Difficulty)
(SAT _|_ Grade, Letter, Difficulty | Intelligence)
(SAT _|_ Letter | Grade)
(SAT _|_ Letter, Difficulty | Intelligence, Grade)
(SAT _|_ Grade, Difficulty | Letter, Intelligence)
(SAT _|_ Letter, Grade | Intelligence, Difficulty)
(SAT _|_ Letter | Difficulty, Grade)
(SAT _|_ Difficulty | Letter, Intelligence, Grade)
(SAT _|_ Letter | Difficulty, Intelligence, Grade)
(SAT _|_ Grade | Letter, Intelligence, Difficulty)
(Intelligence _|_ Difficulty)
(Intelligence _|_ Difficulty | SAT)
(Intelligence _|_ Letter | Grade)
(Intelligence _|_ Letter | SAT, Grade)
(Intelligence _|_ Letter | Difficulty, Grade)
(Intelligence _|_ Letter | SAT, Difficulty, Grade)
(Grade _|_ SAT | Intelligence)
(Grade _|_ SAT | Letter, Intelligence)
(Grade _|_ SAT | Intelligence, Difficulty)
(Grade _|_ SAT | Letter, Intelligence, Difficulty)
(Difficulty _|_ SAT, Intelligence)
(Difficulty _|_ Intelligence | SAT)
(Difficulty _|_ SAT | Intelligence)
(Difficulty _|_ Letter | Grade)
(Difficulty _|_ Lette

## Variable Elimination

In [173]:
#First value: Bayesian network model
#Second value: Element to delete
#Third value: Element to result
def JoinDistribution(model, arr_del, evidence, result):
    
     #Reshape result element
    dim_result = 1
    _len_result = student_model.get_cpds(result).values.shape
    for n in range(len(_len_result)):
        dim_result *= _len_result[n]
    _result = student_model.get_cpds(result).values.reshape((dim_result,1))
        
    #Iterations
    for k in range(len(arr_del)):
        
        dim_del = 1
        #Reshape delete element
        _len_del = student_model.get_cpds(arr_del[k]).values.shape
        for m in range(len(_len_del)):
            dim_del *=_len_del[m]
        _del = student_model.get_cpds(arr_del[k]).values.reshape((dim_del,1))
        
        #del
        for i in range(dim_del):
            if i%2 == 0:
                active = True
            else:
                active = False
                
            count = k
            for j in range(dim_result):
                if active:
                    _result[j] = _result[j]*_del[i]
                
                if count == 0:
                    if active:
                        active = False
                    else:
                        active = True
                    count = k
                else:
                    count = count-1
                    
            #print("-->",_result)
            
        #Do Join Distribucion
            
    return Reduction(_result, arr_del, evidence)

In [141]:
#First value: Bayesian network model
#Second value: Element to delete
#Third value: Element to result
def JoinDistribution(model, arr_del, result):
    
     #Reshape result element
    dim_result = 1
    _len_result = student_model.get_cpds(result).values.shape
    for n in range(len(_len_result)):
        dim_result *= _len_result[n]
    _result = student_model.get_cpds(result).values.reshape((dim_result,1))
        
    #Iterations
    for k in range(len(arr_del)):
        
        dim_del = 1
        #Reshape delete element
        _len_del = student_model.get_cpds(arr_del[k]).values.shape
        for m in range(len(_len_del)):
            dim_del *=_len_del[m]
        _del = student_model.get_cpds(arr_del[k]).values.reshape((dim_del,1))
        
        #del
        for i in range(dim_del):
            if i%2 == 0:
                active = True
            else:
                active = False
                
            count = k
            for j in range(dim_result):
                if active:
                    _result[j] = _result[j]*_del[i]
                
                if count == 0:
                    if active:
                        active = False
                    else:
                        active = True
                    count = k
                else:
                    count = count-1
                    
            #print("-->",_result)
            
        #Do Join Distribucion
            
    return _result

In [146]:
def Marginalization(_result, cardinalidad):
    _ret = np.zeros((cardinalidad,1))
    
    longitud = len(_result)
    _ret_pos = 0
    
    for i in range(len(_result)):
        _ret[_ret_pos] += _result[i]
        
        if((i+1)%(longitud/cardinalidad) == 0):
            _ret_pos +=1
            
    return _ret       
    

In [207]:
def Reduction(_result, arr_del, evidence):
    val = evidence.get(arr_del[0])
    longitud = len(_result)
    
    _arr = np.zeros((int(longitud/2),1))
    
    for i in range(longitud):
        if i%2 == val:
            _arr[int(i/2)] = _result[i]
            
    return Renormalization(_arr)

In [191]:
def Renormalization(_arr):
    suma = np.sum(_arr)
    return _arr/suma
    

In [43]:
x = student_model.get_cpds("SAT").values.shape

In [131]:
student_model.get_cpds("Grade").values

array([[[0.0756 , 0.0056 ],
        [0.0972 , 0.024  ]],

       [[0.1008 , 0.028  ],
        [0.00864, 0.0144 ]],

       [[0.0756 , 0.0784 ],
        [0.00216, 0.0096 ]]])

## Making Inferences

### P(Grade)

In [144]:
#First value: The model
#Second value: list of variable dependencies
#Third value: the inference
#Example: P(Grade). Grade depend to Difficulty and Intelligence
arr = JoinDistribution(student_model, ["Difficulty", "Intelligence"], "Grade")

In [148]:
arr

array([[0.126 ],
       [0.014 ],
       [0.162 ],
       [0.06  ],
       [0.168 ],
       [0.07  ],
       [0.0144],
       [0.036 ],
       [0.126 ],
       [0.196 ],
       [0.0036],
       [0.024 ]])

In [149]:
Marginalization(arr, student_model.get_cardinality("Grade"))

array([[0.362 ],
       [0.2884],
       [0.3496]])

### P(SAT)

In [150]:
#Third value: Conserve this value
arr = JoinDistribution(student_model, ["Intelligence"], "SAT")

In [151]:
arr

array([[0.665],
       [0.06 ],
       [0.035],
       [0.24 ]])

In [152]:
Marginalization(arr, student_model.get_cardinality("SAT"))

array([[0.725],
       [0.275]])

In [157]:
#Third value: Conserve this value
arr = JoinDistribution(student_model, ["Grade", "Difficulty", "Intelligence"], "Letter")

In [158]:
arr

array([[2.1772800e-06],
       [4.4100000e-05],
       [1.4370048e-05],
       [2.8350000e-05],
       [5.5987200e-06],
       [4.7250000e-07]])

### P(SAT) evidence Intelligence = 0

In [205]:
#Third value: Conserve this value
arr = JoinDistribution(student_model, ["Intelligence"], {'Intelligence':0},"SAT")

In [206]:
arr

array([[0.95],
       [0.05]])

### P(SAT) evidence Intelligence = 1

In [210]:
#Third value: Conserve this value
arr = JoinDistribution(student_model, ["Intelligence"], {'Intelligence':1},"SAT")

In [211]:
arr

array([[0.2],
       [0.8]])

## Prueba

In [43]:
# BUT Practice does tell us something about genetics IF we also 
# know something about olympic trials performance
prob_good_genes_if_no_practice_good_olympic_trials = olympic_infer.query(
                                        variables = ['Genetics'], 
                                        evidence = {'Practice':1,
                                                   'OlympicTrials':2})
print(prob_good_genes_if_no_practice_good_olympic_trials['Genetics'])



╒════════════╤═════════════════╕
│ Genetics   │   phi(Genetics) │
╞════════════╪═════════════════╡
│ Genetics_0 │          0.3846 │
├────────────┼─────────────────┤
│ Genetics_1 │          0.6154 │
╘════════════╧═════════════════╛
