In [25]:
from pgmpy.models import BayesianModel
from pgmpy.factors.discrete import TabularCPD
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.inference import VariableElimination
import pandas as pd
import numpy as np

## Steps to Build Bayesian Model

### Step1: Add nodes & edges

In [29]:
student = BayesianModel()
student.add_nodes_from(['diff', 'intel', 'grades'])
student.add_edges_from([('diff', 'grades'), ('intel', 'grades')])
#or a quick way
student1 = BayesianModel([('diff', 'grades'), ('intel', 'grades')])

### Step2: Add CPD

In [30]:
cpd_diff = TabularCPD('diff',2,np.reshape([[0.6,0.4]],(2,1)))
cpd_intel = TabularCPD('intel',2,np.reshape([[0.7,0.3]],(2,1)));
cpd_grades = TabularCPD('grades', 2, [[0.1, 0.9, 0.2, 0.7],
                                     [0.9, 0.1, 0.8, 0.3]],
                                 ['intel', 'diff'], [2, 2])

student.add_cpds(cpd_diff, cpd_intel, cpd_grades)

copy_student = student.copy()
#get nodes
copy_student.nodes()
#get edges
copy_student.edges()
#get cpds
copy_student.get_cpds()

[<TabularCPD representing P(diff:2) at 0x2ca9b34a280>,
 <TabularCPD representing P(intel:2) at 0x2ca9b34a310>,
 <TabularCPD representing P(grades:2 | intel:2, diff:2) at 0x2ca9b34a340>]

### Alternate Step2: Fit data

In [31]:
data = pd.DataFrame(data={'A': [0, 0, 1], 'B': [0, 1, 0], 'C': [1, 1, 0]})
model = BayesianModel([('A', 'C'), ('B', 'C')])
model.fit(data)
model.get_cpds()

[<TabularCPD representing P(A:2) at 0x2ca9b34a5e0>,
 <TabularCPD representing P(C:2 | A:2, B:2) at 0x2ca9b32fc40>,
 <TabularCPD representing P(B:2) at 0x2ca9b32f2b0>]

### Step3: Check mode valid

In [32]:
student.check_model()

True

## Uses of Bayesian Model

### Find independencies

In [33]:
copy_student.get_independencies()

(intel ⟂ diff)
(diff ⟂ intel)

In [35]:
chain = BayesianModel([('X', 'Y'), ('Y', 'Z')])
chain.get_independencies()

(Z ⟂ X | Y)
(X ⟂ Z | Y)

### Local independencies

In [36]:
chain.local_independencies('X')



### Prediction  
```python
model.predict(predict_data)  
model.predict_probability(predict_data)  
```

In [37]:
values = pd.DataFrame(np.random.randint(low=0, high=2, size=(100, 5)),columns=['A', 'B', 'C', 'D', 'E'])
train_data = values[:80]
predict_data = values[80:]
model = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')])
model.fit(values)
predict_data = predict_data.copy()
predict_data.drop('E', axis=1, inplace=True)
y_pred = model.predict(predict_data)
y_pred



  0%|          | 0/9 [00:00<?, ?it/s]

  return data.merge(data_with_results, how="left").loc[:, missing_variables]


Unnamed: 0,E
0,0
1,1
2,0
3,0
4,1
5,1
6,0
7,0
8,0
9,0


In [38]:
values = pd.DataFrame(np.random.randint(low=0, high=2, size=(100, 5)),columns=['A', 'B', 'C', 'D', 'E'])
train_data = values[:80]
predict_data = values[80:]
model = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')])
model.fit(values)
predict_data = predict_data.copy()
predict_data.drop('B', axis=1, inplace=True)
y_prob = model.predict_probability(predict_data)
y_prob



Unnamed: 0,B_0,B_1
80,0.469388,0.530612
81,0.554731,0.445269
82,0.469388,0.530612
83,0.469388,0.530612
84,0.754098,0.245902
85,0.754098,0.245902
86,0.489362,0.510638
87,0.754098,0.245902
88,0.469388,0.530612
89,0.754098,0.245902


### Inference

In [41]:
infer = VariableElimination(student)
posterior_diff = infer.query(['diff'], evidence={'grades': 0, 'intel': 1})
print(posterior_diff)

0it [00:00, ?it/s]

0it [00:00, ?it/s]

+---------+-------------+
| diff    |   phi(diff) |
| diff(0) |      0.3000 |
+---------+-------------+
| diff(1) |      0.7000 |
+---------+-------------+
