In [28]:
from pgmpy.models import BayesianModel
from pgmpy.factors.discrete import TabularCPD
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.inference import VariableElimination
import pandas as pd
import numpy as np

## Steps to Build Bayesian Model

### Step1: Add nodes & edges

In [17]:
student = BayesianModel()
student.add_nodes_from(['diff', 'intel', 'grades'])
student.add_edges_from([('diff', 'grades'), ('intel', 'grades')])
#or a quick way
student1 = BayesianModel([('diff', 'grades'), ('intel', 'grades')])

### Step2: Add CPD

In [18]:
cpd_diff = TabularCPD('diff',2,[[0.6,0.4]]);
cpd_intel = TabularCPD('intel',2,[[0.7,0.3]]);
cpd_grades = TabularCPD('grades', 2, [[0.1, 0.9, 0.2, 0.7],
                                     [0.9, 0.1, 0.8, 0.3]],
                                 ['intel', 'diff'], [2, 2])

student.add_cpds(cpd_diff, cpd_intel, cpd_grades)

copy_student = student.copy()
#get nodes
copy_student.nodes()
#get edges
copy_student.edges()
#get cpds
copy_student.get_cpds()

[<TabularCPD representing P(diff:2) at 0x10a7ff240>,
 <TabularCPD representing P(intel:2) at 0x10a7ff278>,
 <TabularCPD representing P(grades:2 | intel:2, diff:2) at 0x10a7ff2b0>]

### Alternate Step2: Fit data

In [26]:
data = pd.DataFrame(data={'A': [0, 0, 1], 'B': [0, 1, 0], 'C': [1, 1, 0]})
model = BayesianModel([('A', 'C'), ('B', 'C')])
model.fit(data)
model.get_cpds()

[<TabularCPD representing P(A:2) at 0x105164a20>,
 <TabularCPD representing P(B:2) at 0x1022367b8>,
 <TabularCPD representing P(C:2 | A:2, B:2) at 0x106404f98>]

### Step3: Check mode valid

In [54]:
student.check_model()

True

## Uses of Bayesian Model

### Find independencies

In [21]:
copy_student.get_independencies()

(diff _|_ intel)
(intel _|_ diff)

In [22]:
chain = BayesianModel([('X', 'Y'), ('Y', 'Z')])
chain.get_independencies()

(X _|_ Z | Y)
(Z _|_ X | Y)

### Local independencies

In [25]:
chain.local_independencies('X')

(X _|_ Y, Z)

### Prediction  
```python
model.predict(predict_data)  
model.predict_probability(predict_data)  
```

In [52]:
values = pd.DataFrame(np.random.randint(low=0, high=2, size=(100, 5)),columns=['A', 'B', 'C', 'D', 'E'])
train_data = values[:80]
predict_data = values[80:]
model = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')])
model.fit(values)
predict_data = predict_data.copy()
predict_data.drop('E', axis=1, inplace=True)
y_pred = model.predict(predict_data)
y_pred

Unnamed: 0,E_0,E_1
80,0.5,0.5
81,0.5,0.5
82,0.5,0.5
83,0.5,0.5
84,0.5,0.5
85,0.5,0.5
86,0.5,0.5
87,0.5,0.5
88,0.5,0.5
89,0.5,0.5


In [53]:
values = pd.DataFrame(np.random.randint(low=0, high=2, size=(100, 5)),columns=['A', 'B', 'C', 'D', 'E'])
train_data = values[:80]
predict_data = values[80:]
model = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')])
model.fit(values)
predict_data = predict_data.copy()
predict_data.drop('B', axis=1, inplace=True)
y_prob = model.predict_probability(predict_data)
y_prob

Unnamed: 0,E_0,E_1
80,0.5,0.5
81,0.5,0.5
82,0.5,0.5
83,0.5,0.5
84,0.5,0.5
85,0.5,0.5
86,0.5,0.5
87,0.5,0.5
88,0.5,0.5
89,0.5,0.5


### Inference

In [56]:
infer = VariableElimination(student)
posterior_diff = infer.query(['diff'], evidence={'grades': 0, 'intel': 1})
print(posterior_diff['diff'])

╒════════╤═════════════╕
│ diff   │   phi(diff) │
╞════════╪═════════════╡
│ diff_0 │      0.3000 │
├────────┼─────────────┤
│ diff_1 │      0.7000 │
╘════════╧═════════════╛
