In [56]:
#https://www.youtube.com/watch?v=DEHqIxX1Kq4
#https://github.com/AileenNielsen/PyGotham_2016_Probabilistic_Graphical_Models

from pgmpy.factors.discrete import TabularCPD
from pgmpy.models import BayesianNetwork

# Setting up your model

### First, set the structure

In [57]:
olympic_model = BayesianNetwork([('Genetics', 'OlympicTrials'),
                             ('Practice', 'OlympicTrials'),
                             ('OlympicTrials', 'Offer')])

### Then set up the relationships (the CPDs)

In [58]:
genetics_cpd = TabularCPD(
                variable = 'Genetics',
                variable_card = 2,
                values = [[.2],[.8]])

In [59]:
practice_cpd = TabularCPD(
                variable = 'Practice',
                variable_card = 2,
                values = [[.7],[.3]])

In [60]:
offer_cpd = TabularCPD(
                    variable = 'Offer',
                    variable_card = 2,
                    values = [[.95, .8, .5],
                             [.05, .2, .5]],
                    evidence = ['OlympicTrials'],
                    evidence_card = [3])

In [61]:
olympic_trials_cpd = TabularCPD(
                        variable = 'OlympicTrials',
                        variable_card = 3,
                        values = [[.5, .8, .8, .9],
                                 [.3, .15, .1, .08],
                                 [.2, .05, .1, .02]],
                        evidence = ['Genetics', 'Practice'],
                        evidence_card = [2,2])

### Add the relationships to your models

In [62]:
olympic_model.add_cpds (genetics_cpd, practice_cpd, offer_cpd, olympic_trials_cpd)

### Examine the structure of your graph

In [63]:
olympic_model.get_cpds()

[<TabularCPD representing P(Genetics:2) at 0x7e9aca560610>,
 <TabularCPD representing P(Practice:2) at 0x7e9aca562e00>,
 <TabularCPD representing P(Offer:2 | OlympicTrials:3) at 0x7e9aca562ef0>,
 <TabularCPD representing P(OlympicTrials:3 | Genetics:2, Practice:2) at 0x7e9aca560250>]

### Find active trail nodes

In [64]:
olympic_model.active_trail_nodes('Genetics')

{'Genetics': {'Genetics', 'Offer', 'OlympicTrials'}}

In [65]:
olympic_model.active_trail_nodes('OlympicTrials')

{'OlympicTrials': {'Genetics', 'Offer', 'OlympicTrials', 'Practice'}}

### Find local independencies

In [66]:
olympic_model.local_independencies('Genetics')

(Genetics ⟂ Practice)

In [67]:
olympic_model.local_independencies('OlympicTrials')



### Get all independencies

In [68]:
olympic_model.get_independencies()

(Practice ⟂ Genetics)
(Practice ⟂ Offer | OlympicTrials)
(Practice ⟂ Offer | Genetics, OlympicTrials)
(Genetics ⟂ Practice)
(Genetics ⟂ Offer | OlympicTrials)
(Genetics ⟂ Offer | Practice, OlympicTrials)
(Offer ⟂ Practice, Genetics | OlympicTrials)
(Offer ⟂ Genetics | Practice, OlympicTrials)
(Offer ⟂ Practice | Genetics, OlympicTrials)

# Making inferences

### We can get probability distributions that are not explicitly spelled out in our graphs

In [69]:
from pgmpy.inference import VariableElimination

In [70]:
olympic_infer = VariableElimination(olympic_model)

In [71]:
prob_offer = olympic_infer.query(variables = ['Offer'])
print(prob_offer)

+----------+--------------+
| Offer    |   phi(Offer) |
| Offer(0) |       0.8898 |
+----------+--------------+
| Offer(1) |       0.1102 |
+----------+--------------+


### We can also get conditional probability distributions that take into account what we already know

In [72]:
prob_offer_good_genes = olympic_infer.query(
                                        variables = ['Offer', 'Genetics'])
print(prob_offer_good_genes)


+----------+-------------+-----------------------+
| Offer    | Genetics    |   phi(Offer,Genetics) |
| Offer(0) | Genetics(0) |                0.1684 |
+----------+-------------+-----------------------+
| Offer(0) | Genetics(1) |                0.7214 |
+----------+-------------+-----------------------+
| Offer(1) | Genetics(0) |                0.0316 |
+----------+-------------+-----------------------+
| Offer(1) | Genetics(1) |                0.0786 |
+----------+-------------+-----------------------+


In [73]:
prob_offer_good_genes = olympic_infer.query(
                                        variables = ['Offer', 'OlympicTrials'])
print(prob_offer_good_genes)


+----------+------------------+----------------------------+
| Offer    | OlympicTrials    |   phi(Offer,OlympicTrials) |
| Offer(0) | OlympicTrials(0) |                     0.7429 |
+----------+------------------+----------------------------+
| Offer(0) | OlympicTrials(1) |                     0.1010 |
+----------+------------------+----------------------------+
| Offer(0) | OlympicTrials(2) |                     0.0459 |
+----------+------------------+----------------------------+
| Offer(1) | OlympicTrials(0) |                     0.0391 |
+----------+------------------+----------------------------+
| Offer(1) | OlympicTrials(1) |                     0.0252 |
+----------+------------------+----------------------------+
| Offer(1) | OlympicTrials(2) |                     0.0459 |
+----------+------------------+----------------------------+


In [74]:
prob_offer_bad_genes = olympic_infer.query(
                                        variables = ['Offer'],
                                        evidence = {'Genetics':1})
print(prob_offer_bad_genes)

+----------+--------------+
| Offer    |   phi(Offer) |
| Offer(0) |       0.9017 |
+----------+--------------+
| Offer(1) |       0.0983 |
+----------+--------------+


In [75]:
prob_offer_good_genes = olympic_infer.query(
                                        variables = ['Offer'],
                                        evidence = {'Genetics':0})
print(prob_offer_good_genes)

+----------+--------------+
| Offer    |   phi(Offer) |
| Offer(0) |       0.8420 |
+----------+--------------+
| Offer(1) |       0.1580 |
+----------+--------------+


In [76]:
prob_offer_good_genes_did_practice = olympic_infer.query(
                                        variables = ['Offer'],
                                        evidence = {'Genetics':0, 'Practice':0})
print(prob_offer_good_genes_did_practice)

+----------+--------------+
| Offer    |   phi(Offer) |
| Offer(0) |       0.8150 |
+----------+--------------+
| Offer(1) |       0.1850 |
+----------+--------------+


### You can also go upstream logically. For example, evidence about performance at the Olympic Trials affects the probabilities of Genetics.

In [77]:
prob_good_genes_if_amazing_olympic_trials = olympic_infer.query(
                                        variables = ['Genetics'],
                                        evidence = {'OlympicTrials':2})
print(prob_good_genes_if_amazing_olympic_trials)

+-------------+-----------------+
| Genetics    |   phi(Genetics) |
| Genetics(0) |          0.3377 |
+-------------+-----------------+
| Genetics(1) |          0.6623 |
+-------------+-----------------+


### Some variables are only informative about other variables given 'third' variables

In [78]:
# Practice does not inherently tell us something about Genetics
prob_good_genes_if_no_practice = olympic_infer.query(
                                        variables = ['Genetics'],
                                        evidence = {'Practice':1})
print(prob_good_genes_if_no_practice)

+-------------+-----------------+
| Genetics    |   phi(Genetics) |
| Genetics(0) |          0.2000 |
+-------------+-----------------+
| Genetics(1) |          0.8000 |
+-------------+-----------------+


In [79]:
# BUT Practice does tell us something about genetics IF we also
# know something about olympic trials performance
prob_good_genes_if_no_practice_good_olympic_trials = olympic_infer.query(
                                        variables = ['Genetics'],
                                        evidence = {'Practice':1,
                                                   'OlympicTrials':2})
print(prob_good_genes_if_no_practice_good_olympic_trials)

+-------------+-----------------+
| Genetics    |   phi(Genetics) |
| Genetics(0) |          0.3846 |
+-------------+-----------------+
| Genetics(1) |          0.6154 |
+-------------+-----------------+


### We can find out the most probable state for a variable

In [80]:
olympic_infer.map_query(variables = ['Genetics'])

0it [00:00, ?it/s]

0it [00:00, ?it/s]

{'Genetics': 1}

In [81]:
olympic_infer.map_query(variables = ['Offer'])

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

{'Offer': 0}

In [82]:
olympic_infer.map_query(variables = ['OlympicTrials'])

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

{'OlympicTrials': 0}