In [1]:
from pgmpy.models import BayesianModel
from pgmpy.factors.discrete import TabularCPD

# 通过边来定义贝叶斯模型
model = BayesianModel([('D', 'G'), ('I', 'G'), ('G', 'L'), ('I', 'S')])

# 定义条件概率分布
cpd_d = TabularCPD(variable='D', variable_card=2, values=[[0.6, 0.4]])
cpd_i = TabularCPD(variable='I', variable_card=2, values=[[0.7, 0.3]])

# variable：变量
# variable_card：基数
# values：变量值
# evidence：
cpd_g = TabularCPD(variable='G', variable_card=3, 
                   values=[[0.3, 0.05, 0.9,  0.5],
                           [0.4, 0.25, 0.08, 0.3],
                           [0.3, 0.7,  0.02, 0.2]],
                  evidence=['I', 'D'],
                  evidence_card=[2, 2])

cpd_l = TabularCPD(variable='L', variable_card=2, 
                   values=[[0.1, 0.4, 0.99],
                           [0.9, 0.6, 0.01]],
                   evidence=['G'],
                   evidence_card=[3])

cpd_s = TabularCPD(variable='S', variable_card=2,
                   values=[[0.95, 0.2],
                           [0.05, 0.8]],
                   evidence=['I'],
                   evidence_card=[2])

# 将有向无环图与条件概率分布表关联
model.add_cpds(cpd_d, cpd_i, cpd_g, cpd_l, cpd_s)

# 验证模型：检查网络结构和CPD，并验证CPD是否正确定义和总和为1
model.check_model()

True

In [2]:
model.get_cpds()  # 获取概率图模型

[<TabularCPD representing P(D:2) at 0x7f20c84f3fd0>,
 <TabularCPD representing P(I:2) at 0x7f20c84f5080>,
 <TabularCPD representing P(G:3 | I:2, D:2) at 0x7f20c84f51d0>,
 <TabularCPD representing P(L:2 | G:3) at 0x7f20c84f5128>,
 <TabularCPD representing P(S:2 | I:2) at 0x7f20c8268668>]

In [3]:
print(model.get_cpds('G'))   # 获取结点G的概率表

+-----+-----+------+------+-----+
| I   | I_0 | I_0  | I_1  | I_1 |
+-----+-----+------+------+-----+
| D   | D_0 | D_1  | D_0  | D_1 |
+-----+-----+------+------+-----+
| G_0 | 0.3 | 0.05 | 0.9  | 0.5 |
+-----+-----+------+------+-----+
| G_1 | 0.4 | 0.25 | 0.08 | 0.3 |
+-----+-----+------+------+-----+
| G_2 | 0.3 | 0.7  | 0.02 | 0.2 |
+-----+-----+------+------+-----+


In [4]:
model.get_cardinality('G')   # 获取结点G的基数

3

In [5]:
model.local_independencies(['D', 'I', 'S', 'G', 'L'])    # 获取整个贝叶斯网络的局部依赖

(D _|_ I, S)
(I _|_ D)
(S _|_ D, G, L | I)
(G _|_ S | D, I)
(L _|_ D, I, S | G)

In [6]:
# 变量消除:

from pgmpy.inference import VariableElimination
infer = VariableElimination(model)
print(infer.query(['G']) ['G'])

+-----+----------+
| G   |   phi(G) |
| G_0 |   0.3620 |
+-----+----------+
| G_1 |   0.2884 |
+-----+----------+
| G_2 |   0.3496 |
+-----+----------+


  phi.values = phi.values[slice_]
  phi1.values = phi1.values[slice_]


In [7]:
print(infer.query(['G'], evidence={'D': 0, 'I': 1}) ['G'])  # 计算出条件分布概率

+-----+----------+
| G   |   phi(G) |
| G_0 |   0.9000 |
+-----+----------+
| G_1 |   0.0800 |
+-----+----------+
| G_2 |   0.0200 |
+-----+----------+


In [15]:
# 新数据节点值的预测跟计算条件概率非常相似，我们需要查询预测变量的其他全部特征。困难在于通过分布概率去代替更多可能的变量状态

infer.map_query(['L','I'])

{'L': 1, 'I': 0}