# Representing Independencies using pgmpy

In [19]:
# Importing IndependenciesAssertion
# from pgmpy.independencies import independenceAssertion

In [20]:
# Creating an empty DAG with no nodes and no edges
from pgmpy.base import DAG

In [21]:
G = DAG()

In [22]:
# Growing G: Add a node "A"
G.add_node(node="A")

In [23]:
# Adding node from any container( a list, a set or tuple or node from another graph)

G.add_nodes_from=['A', 'B']

In [24]:
# Grow G by adding edges
# Add one edge:
G.add_edge(u='A', v='B')

In [25]:
# A list of edges:
G.add_edges_from(ebunch=[('A', 'B'), ('B', 'C')])

In [26]:
#Check if node is in graph
"a" in G

False

In [27]:
"A" in G

True

In [28]:
"C" in G

True

In [29]:
'D' in G

False

In [30]:
# Length of graph:
len(G)

3

In [31]:
# active_trail_nodes (variables, observed=None, include_latents=False):
# The above returns a dictionary with the given cariables as keys and
# and all the nodes reachable from that respective variables as values

In [32]:
#EXAMPLE 1:
from pgmpy.base import DAG
student = DAG()
student.add_nodes_from(['difficulty', 'intelligence', 'grades'])
student.add_edges_from([('difficulty', 'grades'), ('intelligence', 'grades')])
student.active_trail_nodes('difficulty')

{'difficulty': {'difficulty', 'grades'}}

In [33]:
#Observing grades
student.active_trail_nodes(['difficulty', 'intelligence'], observed='grades')


{'difficulty': {'difficulty', 'intelligence'},
 'intelligence': {'difficulty', 'intelligence'}}

In [34]:
# Adds a single node to the Graph:
from pgmpy.base import DAG
G = DAG()
G.add_node(node='J')
sorted(G.nodes())

['J']

In [35]:
# Add multiple nodes to the Graph:
from pgmpy.base import DAG
M = DAG()
M.add_nodes_from(nodes=['P', 'Q', 'R'])
M.nodes()

NodeView(('P', 'Q', 'R'))

In [36]:
# ANCESTRAL GRAPH:
from pgmpy.base import DAG
dag = DAG([('A', 'C'), ('B', 'C'), ('D', 'A'), ('D', 'B')])
anc_dag = dag.get_ancestral_graph(nodes=['A', 'B'])
anc_dag.edges()

OutEdgeView([('D', 'A'), ('D', 'B')])

In [37]:
# CHLDREN NODES:
from pgmpy.base import DAG
g = DAG(ebunch=[('A', 'B'), ('C', 'B'), ('B', 'D'),
                              ('B', 'E'), ('B', 'F'), ('E', 'G')])
g.get_children(node='B')

['D', 'E', 'F']

In [38]:
# Finds all the immoralities in the model A v-structure X -> Z <- Y is an immorality 
# if there is no direct edge between X and Y .

from pgmpy.base import DAG
student = DAG()
student.add_edges_from([('diff', 'grade'), ('intel', 'grade'),
                        ('intel', 'SAT'), ('grade', 'letter')])
student.get_immoralities()

{('diff', 'intel')}

In [39]:
# Computes independencies in the DAG, by checking d-seperation.
from pgmpy.base import DAG
chain = DAG([('X', 'Y'), ('Y', 'Z')])
chain.get_independencies()

(X ⟂ Z | Y)
(Z ⟂ X | Y)

In [40]:
# Returns a list of leaves of the graph:
from pgmpy.base import DAG
graph = DAG([('A', 'B'), ('B', 'C'), ('B', 'D')])
graph.get_leaves()

['C', 'D']

In [41]:
# Get a markov blanket for y
from pgmpy.base import DAG
from pgmpy.factors.discrete import TabularCPD
G = DAG([('x', 'y'), ('z', 'y'), ('y', 'w'), ('y', 'v'), ('u', 'w'),
                       ('s', 'v'), ('w', 't'), ('w', 'm'), ('v', 'n'), ('v', 'q')])
G.get_markov_blanket('y')

['s', 'x', 'z', 'u', 'w', 'v']

In [42]:
# Returns a list of parents of node.
from pgmpy.base import DAG
G = DAG(ebunch=[('diff', 'grade'), ('intel', 'grade')])
G.get_parents(node='grade')

['diff', 'intel']

In [43]:
# Returns a randomly generated DAG with n_nodes number of nodes 
# with edge probability being edge_prob.

from pgmpy.base import DAG
random_dag = DAG.get_random(n_nodes=10, edge_prob=0.3)
random_dag.nodes()

NodeView((0, 2, 4, 6, 8, 9, 1, 3, 5, 7))

In [44]:
# Returns a list of roots of the graph.
from pgmpy.base import DAG
graph = DAG([('A', 'B'), ('B', 'C'), ('B', 'D'), ('E', 'B')])
graph.get_roots()

['A', 'E']

In [45]:
from pgmpy.base import DAG
G = DAG(ebunch=[('diff', 'grade'), ('intel', 'grade')])
moral_graph = G.moralize()
moral_graph.edges()

EdgeView([('diff', 'grade'), ('diff', 'intel'), ('grade', 'intel')])

# GRAPHS

In [46]:
from pgmpy.base import DAG
dag = DAG([('a', 'b'), ('b', 'c'), ('d', 'c')])
dag.to_daft(node_pos={'a': (0, 0), 'b': (1, 0), 'c': (2, 0), 'd': (1, 1)})

<daft.PGM at 0x1dfaf113978>

In [47]:
# REPRESENTING JOINT PROBABILITY DISTRIBUTION:
# from pgmpy.factors import JointProbabilityDistribution as Joint
# distribution = Joint(['coin1', 'coin2'],
#                      [2, 2], 
#                      [0.25, 0.25, 0.25, 0.25])

In [48]:
# Bayesian Model Representation:
from pgmpy.models import BayesianModel

In [49]:
model = BayesianModel()

In [50]:
# Add nodes and edges:
model.add_nodes_from(['rain', 'traffic_jam'])
model.add_edge('rain', 'traffic_jam')

In [51]:
model.add_edge('accident', 'traffic_jam')

In [52]:
# Check the nodes
model.nodes()

NodeView(('rain', 'traffic_jam', 'accident'))

In [53]:
# Check the edges
model.edges()

OutEdgeView([('rain', 'traffic_jam'), ('accident', 'traffic_jam')])

In [54]:
# from pgmpy.factors import TabularCPD ## This did not work
from pgmpy.factors.discrete import TabularCPD 
cpd_rain = TabularCPD('rain', 2, [[0.4], [0.6]])+
cpd_accident = TabularCPD('accident', 2, [[0.2], [0.8]])
cpd_traffic_jam = TabularCPD('traffic_jam', 2,
 [[0.9, 0.6, 0.7, 0.1],
 [0.1, 0.4, 0.3, 0.9]],
 evidence=['rain', 'accident'],
 evidence_card=[2, 2])

In [55]:
model.add_cpds(cpd_rain, cpd_accident, cpd_traffic_jam)
model.get_cpds()

[<TabularCPD representing P(rain:2) at 0x1dfb001b438>,
 <TabularCPD representing P(accident:2) at 0x1dfb001b470>,
 <TabularCPD representing P(traffic_jam:2 | rain:2, accident:2) at 0x1dfaf0e30f0>]

In [57]:
# Now, let's add the remaining variables and their CPDs:

model.add_node('long_queues')
model.add_edge('traffic_jam', 'long_queues')
cpd_long_queues = TabularCPD('long_queues', 2,
                             [[0.9, 0.2],
                              [0.1, 0.8]],
                             evidence=['traffic_jam'],
                             evidence_card=[2])
model.add_cpds(cpd_long_queues)
model.add_nodes_from(['getting_up_late', 
                      'late_for_school'])
model.add_edges_from(
    [('getting_up_late', 'late_for_school'),
     ('traffic_jam', 'late_for_school')])
cpd_getting_up_late = TabularCPD('getting_up_late', 2, 
                                 [[0.6], [0.4]])
cpd_late_for_school = TabularCPD( 
    'late_for_school', 2, 
    [[0.9, 0.45, 0.8, 0.1], 
     [0.1, 0.55, 0.2, 0.9]], 
    evidence=['getting_up_late', 
              'traffic_jam'], 
    evidence_card=[2, 2])
model.add_cpds(cpd_getting_up_late, cpd_late_for_school)
model.get_cpds()

[<TabularCPD representing P(rain:2) at 0x1dfb001b438>,
 <TabularCPD representing P(accident:2) at 0x1dfb001b470>,
 <TabularCPD representing P(traffic_jam:2 | rain:2, accident:2) at 0x1dfaf0e30f0>,
 <TabularCPD representing P(long_queues:2 | traffic_jam:2) at 0x1dfb001bb70>,
 <TabularCPD representing P(getting_up_late:2) at 0x1dfb001bbe0>,
 <TabularCPD representing P(late_for_school:2 | getting_up_late:2, traffic_jam:2) at 0x1dfb001bb38>]