In [78]:
from pgmpy.factors import TabularCPD
from pgmpy.models import BayesianModel

# Setting up your model

### First, set the structure

In [79]:
olympic_model = BayesianModel([('Genetics', 'OlympicTrials'),
                             ('Practice', 'OlympicTrials'),
                             ('OlympicTrials', 'Offer')])

### Then set up the relationships (the CPDs)

In [80]:
genetics_cpd = TabularCPD(
                variable = 'Genetics',
                variable_card = 2,
                values = [[.2,.8]])

In [81]:
practice_cpd = TabularCPD(
                variable = 'Practice',
                variable_card = 2,
                values = [[.7,.3]])

In [82]:
offer_cpd = TabularCPD(
                    variable = 'Offer',
                    variable_card = 2,
                    values = [[.95, .8, .5],
                             [.05, .2, .5]],
                    evidence = ['OlympicTrials'],
                    evidence_card = 3)

In [83]:
olympic_trials_cpd = TabularCPD(
                        variable = 'OlympicTrials', 
                        variable_card = 3,
                        values = [[.5, .8, .8, .9],
                                 [.3, .15, .1, .08],
                                 [.2, .05, .1, .02]],
                        evidence = ['Genetics', 'Practice'],
                        evidence_card = [2,2])

### Add the relationships to your models

In [85]:
olympic_model.add_cpds (genetics_cpd, practice_cpd, offer_cpd, olympic_trials_cpd)



### Examine the structure of your graph

In [86]:
olympic_model.get_cpds()

[<TabularCPD representing P(Genetics:2) at 0x1169632e8>,
 <TabularCPD representing P(Practice:2) at 0x1169636a0>,
 <TabularCPD representing P(Offer:2 | OlympicTrials:3) at 0x116963940>,
 <TabularCPD representing P(OlympicTrials:3 | Genetics:2, Practice:2) at 0x116963ef0>]

### Find active trail nodes

In [87]:
olympic_model.active_trail_nodes('Genetics')

{'Genetics', 'Offer', 'OlympicTrials'}

In [88]:
olympic_model.active_trail_nodes('OlympicTrials')

{'Genetics', 'Offer', 'OlympicTrials', 'Practice'}

### Find local independencies

In [89]:
olympic_model.local_independencies('Genetics')

(Genetics _|_ Practice)

In [90]:
olympic_model.local_independencies('OlympicTrials')



### Get all independencies

In [91]:
olympic_model.get_independencies()

(Genetics _|_ Practice)
(Genetics _|_ Offer | OlympicTrials)
(Genetics _|_ Offer | Practice, OlympicTrials)
(Practice _|_ Genetics)
(Practice _|_ Offer | OlympicTrials)
(Practice _|_ Offer | Genetics, OlympicTrials)
(Offer _|_ Genetics, Practice | OlympicTrials)
(Offer _|_ Practice | Genetics, OlympicTrials)
(Offer _|_ Genetics | Practice, OlympicTrials)

# Making inferences

### We can get probability distributions that are not explicitly spelled out in our graphs

In [92]:
from pgmpy.inference import VariableElimination

In [93]:
olympic_infer = VariableElimination(olympic_model)

In [94]:
prob_offer = olympic_infer.query(variables = ['Offer'])
print(prob_offer['Offer'])

╒═════════╤══════════════╕
│ Offer   │   phi(Offer) │
╞═════════╪══════════════╡
│ Offer_0 │       0.8898 │
├─────────┼──────────────┤
│ Offer_1 │       0.1102 │
╘═════════╧══════════════╛


### We can also get conditional probability distributions that take into account what we already know

In [None]:
prob_offer_good_genes = olympic_infer.query(
                                        variables = ['Offer', 'Genetics'])
print(prob_offer_good_genes['Genetics'])
print(prob_offer_good_genes['Offer'])

In [None]:
prob_offer_good_genes = olympic_infer.query(
                                        variables = ['Offer', 'OlympicTrials'])
print(prob_offer_good_genes['OlympicTrials'])
print(prob_offer_good_genes['Offer'])

In [95]:
prob_offer_bad_genes = olympic_infer.query(
                                        variables = ['Offer'], 
                                        evidence = {'Genetics':1})
print(prob_offer_bad_genes['Offer'])

╒═════════╤══════════════╕
│ Offer   │   phi(Offer) │
╞═════════╪══════════════╡
│ Offer_0 │       0.9017 │
├─────────┼──────────────┤
│ Offer_1 │       0.0983 │
╘═════════╧══════════════╛


In [96]:
prob_offer_good_genes = olympic_infer.query(
                                        variables = ['Offer'], 
                                        evidence = {'Genetics':0})
print(prob_offer_good_genes['Offer'])

╒═════════╤══════════════╕
│ Offer   │   phi(Offer) │
╞═════════╪══════════════╡
│ Offer_0 │       0.8420 │
├─────────┼──────────────┤
│ Offer_1 │       0.1580 │
╘═════════╧══════════════╛


In [97]:
prob_offer_good_genes_did_practice = olympic_infer.query(
                                        variables = ['Offer'], 
                                        evidence = {'Genetics':0, 'Practice':0})
print(prob_offer_good_genes_did_practice['Offer'])

╒═════════╤══════════════╕
│ Offer   │   phi(Offer) │
╞═════════╪══════════════╡
│ Offer_0 │       0.8150 │
├─────────┼──────────────┤
│ Offer_1 │       0.1850 │
╘═════════╧══════════════╛


### You can also go upstream logically. For example, evidence about performance at the Olympic Trials affects the probabilities of Genetics.

In [98]:
prob_good_genes_if_amazing_olympic_trials = olympic_infer.query(
                                        variables = ['Genetics'], 
                                        evidence = {'OlympicTrials':2})
print(prob_good_genes_if_amazing_olympic_trials['Genetics'])

╒════════════╤═════════════════╕
│ Genetics   │   phi(Genetics) │
╞════════════╪═════════════════╡
│ Genetics_0 │          0.3377 │
├────────────┼─────────────────┤
│ Genetics_1 │          0.6623 │
╘════════════╧═════════════════╛


### Some variables are only informative about other variables given 'third' variables

In [99]:
# Practice does not inherently tell us something about Genetics
prob_good_genes_if_no_practice = olympic_infer.query(
                                        variables = ['Genetics'], 
                                        evidence = {'Practice':1})
print(prob_good_genes_if_no_practice['Genetics'])

╒════════════╤═════════════════╕
│ Genetics   │   phi(Genetics) │
╞════════════╪═════════════════╡
│ Genetics_0 │          0.2000 │
├────────────┼─────────────────┤
│ Genetics_1 │          0.8000 │
╘════════════╧═════════════════╛


In [100]:
# BUT Practice does tell us something about genetics IF we also 
# know something about olympic trials performance
prob_good_genes_if_no_practice_good_olympic_trials = olympic_infer.query(
                                        variables = ['Genetics'], 
                                        evidence = {'Practice':1,
                                                   'OlympicTrials':2})
print(prob_good_genes_if_no_practice_good_olympic_trials['Genetics'])

╒════════════╤═════════════════╕
│ Genetics   │   phi(Genetics) │
╞════════════╪═════════════════╡
│ Genetics_0 │          0.3846 │
├────────────┼─────────────────┤
│ Genetics_1 │          0.6154 │
╘════════════╧═════════════════╛


### We can find out the most probable state for a variable

In [101]:
olympic_infer.map_query(variables = ['Genetics'])

{'Genetics': 1}

In [102]:
olympic_infer.map_query(variables = ['Offer'])

{'Offer': 0}

In [103]:
olympic_infer.map_query(variables = ['OlympicTrials'])

{'OlympicTrials': 0}