# Representing independencies using pgmpy

## IndependenceAssertion

Represents Conditional Independence or Independence assertion.

Each assertion has 3 attributes: event1, event2, event3.

The attributes for $U \perp X, Y | Z$

In [1]:
from pgmpy.independencies import IndependenceAssertion
from pgmpy.independencies import Independencies

In [2]:
assertion1 = IndependenceAssertion('X', 'Y')
assertion1

(X _|_ Y)

In [3]:
assertion2 = IndependenceAssertion('X', 'Y', 'Z')
assertion2

(X _|_ Y | Z)

## Independencies


Base class for independencies.

independencies class represents a set of Conditional Independence

assertions (eg: "X is independent of Y given Z" where X, Y and Z

are random variables) or Independence assertions (eg: "X is

independent of Y" where X and Y are random variables).

Initialize the independencies Class with Conditional Independence

assertions or Independence assertions.

In [4]:
independencies = Independencies() # Empty object
independencies.add_assertions(assertion1, assertion2)
independencies.get_assertions()

[(X _|_ Y), (X _|_ Y | Z)]

In [5]:
independencies = Independencies(assertion1, assertion2)
independencies

(X _|_ Y)
(X _|_ Y | Z)

In [6]:
independencies = Independencies(['X', 'Y'], ['A', 'B', 'C'])
independencies

(X _|_ Y)
(A _|_ B | C)

# Representing joint probability distributions

In [7]:
from pgmpy.factors.discrete import JointProbabilityDistribution as Joint

In [8]:
distribution = Joint(variables=['coin1', 'coin2'], 
                     cardinality=[2, 2], 
                     values=[0.25, 0.25, 0.25, 0.25])
print(distribution)

coin1    coin2      P(coin1,coin2)
-------  -------  ----------------
coin1_0  coin2_0            0.2500
coin1_0  coin2_1            0.2500
coin1_1  coin2_0            0.2500
coin1_1  coin2_1            0.2500


# Representing Conditional Probability Distribution (CPDs)

In [9]:
from pgmpy.factors.discrete.CPD import TabularCPD

In [10]:
quality = TabularCPD(variable="Quality", variable_card=3, values=[[0.3], [0.5], [0.2]])
print(quality)
print(quality.cardinality)
print(quality.values)

+-----------+-----+
| Quality_0 | 0.3 |
+-----------+-----+
| Quality_1 | 0.5 |
+-----------+-----+
| Quality_2 | 0.2 |
+-----------+-----+
[3]
[0.3 0.5 0.2]


In [11]:
location = TabularCPD(variable="Location",
                      variable_card=2, 
                      values=[[0.6], [0.4]])
print(location)

+------------+-----+
| Location_0 | 0.6 |
+------------+-----+
| Location_1 | 0.4 |
+------------+-----+


In [12]:
cost = TabularCPD(variable="Cost", 
                  variable_card=2,
                  values=[[0.8, 0.6, 0.1, 0.6, 0.6, 0.05], [0.2, 0.4, 0.9, 0.4, 0.4, 0.95]],
                  evidence=['Q', 'L'], 
                  evidence_card=[3, 2])
print(cost)

+--------+-----+-----+-----+-----+-----+------+
| Q      | Q_0 | Q_0 | Q_1 | Q_1 | Q_2 | Q_2  |
+--------+-----+-----+-----+-----+-----+------+
| L      | L_0 | L_1 | L_0 | L_1 | L_0 | L_1  |
+--------+-----+-----+-----+-----+-----+------+
| Cost_0 | 0.8 | 0.6 | 0.1 | 0.6 | 0.6 | 0.05 |
+--------+-----+-----+-----+-----+-----+------+
| Cost_1 | 0.2 | 0.4 | 0.9 | 0.4 | 0.4 | 0.95 |
+--------+-----+-----+-----+-----+-----+------+


# Bayesian model representation

![](figs/chap01-bayes-network-exp01.png)

In [13]:
from pgmpy.models import BayesianModel

In [14]:
model = BayesianModel()
model.add_nodes_from(['rain', 'traffic_jam'])
model.add_edge('rain', 'traffic_jam')
print(model.nodes())
print(model.edges())

['traffic_jam', 'rain']
[('rain', 'traffic_jam')]


In [15]:
# automatically adds those nodes to the model
model.add_edge('accident', 'traffic_jam')
print(model.nodes())
print(model.edges())

['accident', 'traffic_jam', 'rain']
[('accident', 'traffic_jam'), ('rain', 'traffic_jam')]


In [16]:
# In the case of a Bayesian network, each of the nodes has an associated CPD with it.
cpd_rain = TabularCPD('rain', 2, [[0.4], [0.6]])
cpd_accident = TabularCPD('accident', 2, [[0.2], [0.8]])
cpd_traffic_jam = TabularCPD('traffic_jam', 2, [[0.9, 0.6, 0.7, 0.1], [0.1, 0.4, 0.3, 0.9]], 
                             evidence=['rain', 'accident'], 
                             evidence_card=[2, 2])

# associate them with our model.
model.add_cpds(cpd_rain, cpd_accident, cpd_traffic_jam)
model.get_cpds()

[<TabularCPD representing P(rain:2) at 0x114cebe90>,
 <TabularCPD representing P(accident:2) at 0x114cebe50>,
 <TabularCPD representing P(traffic_jam:2 | rain:2, accident:2) at 0x114cebed0>]

In [17]:
# Now, let's add the remaining variables and their CPDs
model.add_node('long_queues') 
model.add_edge('traffic_jam', 'long_queues') 
cpd_long_queues = TabularCPD('long_queues', 2, [[0.9, 0.2], [0.1, 0.8]],
                             evidence=['traffic_jam'], 
                             evidence_card=[2])

model.add_cpds(cpd_long_queues) 
model.add_nodes_from(['getting_up_late', 'late_for_school'])

# 
model.add_edges_from([('getting_up_late', 'late_for_school'), ('traffic_jam', 'late_for_school')])
cpd_getting_up_late = TabularCPD('getting_up_late', 2, [[0.6], [0.4]])
cpd_late_for_school = TabularCPD('late_for_school', 2, [[0.9, 0.45, 0.8, 0.1], [0.1, 0.55, 0.2, 0.9]], 
                                 evidence=['getting_up_late', 'traffic_jam'],
                                 evidence_card=[2, 2])

model.add_cpds(cpd_getting_up_late, cpd_late_for_school) 
model.get_cpds()

[<TabularCPD representing P(rain:2) at 0x114cebe90>,
 <TabularCPD representing P(accident:2) at 0x114cebe50>,
 <TabularCPD representing P(traffic_jam:2 | rain:2, accident:2) at 0x114cebed0>,
 <TabularCPD representing P(long_queues:2 | traffic_jam:2) at 0x114e0da90>,
 <TabularCPD representing P(getting_up_late:2) at 0x112d0dcd0>,
 <TabularCPD representing P(late_for_school:2 | getting_up_late:2, traffic_jam:2) at 0x114e0da10>]

In [18]:
# checks whether the model and all the associated CPDs are consistent
model.check_model()

True

In [19]:
# In case we have got some wrong CPD associated with the model and we want to remove it
model.remove_cpds('late_for_school')
model.get_cpds()

[<TabularCPD representing P(rain:2) at 0x114cebe90>,
 <TabularCPD representing P(accident:2) at 0x114cebe50>,
 <TabularCPD representing P(traffic_jam:2 | rain:2, accident:2) at 0x114cebed0>,
 <TabularCPD representing P(long_queues:2 | traffic_jam:2) at 0x114e0da90>,
 <TabularCPD representing P(getting_up_late:2) at 0x112d0dcd0>]

In [20]:
# if an influence can flow in a trail in a network, it is known as an active trail.
model.is_active_trail('accident', 'rain')

False

In [21]:
model.is_active_trail('accident', 'rain', observed='traffic_jam')

True

In [22]:
model.is_active_trail('getting_up_late', 'rain')

False

In [23]:
model.is_active_trail('getting_up_late', 'rain', observed='late_for_school')

True