# My model on `pgmpy` framework

In [1]:
from pgmpy.models import BayesianNetwork
from pgmpy.factors.discrete import TabularCPD
from pgmpy.inference import VariableElimination
from pgmpy.inference import ApproxInference

In [2]:
# Defining the network structure
model = BayesianNetwork(
    [
        ("debtsToIncomeRatio", "paymentHistory"),
        ("degree", "income"),
        ("income", "assets"),
        ("assets", "futureIncome"),
        ("income", "futureIncome"),
        ("creditTime", "fullPayoutProb"),
        ("age", "fullPayoutProb"),
        ("age", "reliability"),
        ("age", "paymentHistory"),
        ("paymentHistory", "reliability"),
        
        ("fullPayoutProb", "creditWorthiness"),
        ("reliability", "creditWorthiness"),
        ("futureIncome", "creditWorthiness"),
        ("debtsToIncomeRatio", "creditWorthiness"),
    ]
)

In [3]:
all_state_names = {
    "debtsToIncomeRatio": ["high", "low"],
    "paymentHistory": ["good", "bad"],
    "degree": ["medium", "high"],
    "income": ["high", "low"],
    "assets": ["many", "few"],
    "futureIncome": ["high", "low"],
    "creditTime": ["short", "long"],
    "fullPayoutProb": ["high", "low"],
    "age": ["young", "old"],
    "reliability": ["reliable", "unreliable"],
    "creditWorthiness": ["Worth", "Not worth"],
}

## Defining the CPDs

In [4]:
# Defining the CPD for credit_time:
cpd_cre_time = TabularCPD("creditTime", 2, [[0.4], [0.6]], state_names=all_state_names,)

In [5]:
# Defining the CPD for debts_to_income_ratio:
cpd_dti = TabularCPD("debtsToIncomeRatio", 2, [[0.3], [0.7]], state_names=all_state_names,)

In [6]:
# Defining the CPD for degree:
cpd_deg = TabularCPD("degree", 2, [[0.25], [0.75]], state_names=all_state_names,)

In [7]:
# Defining the CPD for income:
cpd_inc = TabularCPD(
    "income", 
     2, 
     [
         [.2, .6],
         [.8, .4]
     ], 
     evidence=['degree'], 
     evidence_card=[2],
     state_names=all_state_names,
)

In [8]:
# Defining the CPD for assets:
cpd_ast = TabularCPD(
    "assets", 
    2, 
    [
        [.9, .4],
        [.1, .6]
    ], 
    evidence=['income'], 
    evidence_card=[2],
    state_names=all_state_names,
)

In [9]:
# Defining the CPD for future_income:
cpd_fin = TabularCPD(
    "futureIncome",
    2,
    [
        [.995, .6, .7, .1],
        [.005, .4, .3, .9],
    ],
    evidence=["assets", "income"],
    evidence_card=[2, 2],
    state_names=all_state_names,
)

In [10]:
# Defining the CPD for age:
cpd_age = TabularCPD("age", 2, [[0.8], [0.2]], state_names=all_state_names,)

In [11]:
# Defining the CPD for payment_history:
cpd_phs = TabularCPD(
    "paymentHistory",
    2,
    [
        [.2, .3, .8, .9],
        [.8, .7, .2, .1],
    ],
    evidence=["debtsToIncomeRatio", "age"],
    evidence_card=[2, 2],
    state_names=all_state_names,
)

In [12]:
# Defining the CPD for reliability:
cpd_rel = TabularCPD(
    "reliability",
    2,
    [
        [.8, .4, .95, .2],
        [.2, .6, .05, .8],
    ],
    evidence=["age", "paymentHistory"],
    evidence_card=[2, 2],
    state_names=all_state_names,
)

In [13]:
# Defining the CPD for full_payout_probability:
cpd_fpp = TabularCPD(
    "fullPayoutProb",
    2,
    [
        [.95, .9, .9, .3],
        [.05, .1, .1, .7],
    ],
    evidence=["creditTime", "age"],
    evidence_card=[2, 2],
    state_names=all_state_names,
)

In [14]:
# Defining the CPDs for credit_worthiness:
cpd_cw = TabularCPD(
    "creditWorthiness",
    2,
    [
        [.7, .6, .35, .5, .999, .9, .75, .55, .5, .05, .2, .0001, .7, .6, .1, .003],
        [.3, .4, .65, .5, .001, .1, .25, .45, .5, .95, .8, .9999, .3, .4, .9, .997],
    ],
    evidence=["futureIncome", "debtsToIncomeRatio", "fullPayoutProb", "reliability"],
    evidence_card=[2, 2, 2, 2],
    state_names=all_state_names,
)

In [15]:
# Associating the CPDs with the network structure.
model.add_cpds(
    cpd_cre_time,
    cpd_dti,
    cpd_deg,
    cpd_inc,
    cpd_ast,
    cpd_fin,
    cpd_age,
    cpd_phs,
    cpd_rel,
    cpd_fpp,
    cpd_cw
)

In [16]:
# Checking the model for various errors
model.check_model()    

True

In [17]:
## Algorithm: describe, try to change! эксперимент с другим алгоритмом, рассказать про алгоритм

## Using the model (exact inference)

In [18]:
infer = VariableElimination(model)
credit_result = infer.query(['creditWorthiness'])
print(credit_result)

+-----------------------------+-------------------------+
| creditWorthiness            |   phi(creditWorthiness) |
| creditWorthiness(Worth)     |                  0.7051 |
+-----------------------------+-------------------------+
| creditWorthiness(Not worth) |                  0.2949 |
+-----------------------------+-------------------------+


In [19]:
credit_result = infer.query(['creditWorthiness'], evidence={"fullPayoutProb": "low"})
print(credit_result)

+-----------------------------+-------------------------+
| creditWorthiness            |   phi(creditWorthiness) |
| creditWorthiness(Worth)     |                  0.4285 |
+-----------------------------+-------------------------+
| creditWorthiness(Not worth) |                  0.5715 |
+-----------------------------+-------------------------+


In [20]:
credit_result = infer.query(['creditWorthiness'], evidence={"fullPayoutProb": "low", "age": "old"})
print(credit_result)

+-----------------------------+-------------------------+
| creditWorthiness            |   phi(creditWorthiness) |
| creditWorthiness(Worth)     |                  0.4359 |
+-----------------------------+-------------------------+
| creditWorthiness(Not worth) |                  0.5641 |
+-----------------------------+-------------------------+


In [21]:
credit_result = infer.query(['creditWorthiness'], evidence={"assets": "many"})
print(credit_result)

+-----------------------------+-------------------------+
| creditWorthiness            |   phi(creditWorthiness) |
| creditWorthiness(Worth)     |                  0.7915 |
+-----------------------------+-------------------------+
| creditWorthiness(Not worth) |                  0.2085 |
+-----------------------------+-------------------------+


In [22]:
credit_result = infer.query(['creditWorthiness'], evidence={"futureIncome": "low", "reliability": "reliable"})
print(credit_result)

+-----------------------------+-------------------------+
| creditWorthiness            |   phi(creditWorthiness) |
| creditWorthiness(Worth)     |                  0.5686 |
+-----------------------------+-------------------------+
| creditWorthiness(Not worth) |                  0.4314 |
+-----------------------------+-------------------------+


In [23]:
credit_result = infer.query(['creditWorthiness'], evidence={"futureIncome": "low", "reliability": "unreliable"})
print(credit_result)

+-----------------------------+-------------------------+
| creditWorthiness            |   phi(creditWorthiness) |
| creditWorthiness(Worth)     |                  0.2966 |
+-----------------------------+-------------------------+
| creditWorthiness(Not worth) |                  0.7034 |
+-----------------------------+-------------------------+


In [24]:
credit_result = infer.query(['creditTime'], evidence={
    "creditWorthiness": "Not worth", "paymentHistory": "good", "assets": "many"
    }
)
print(credit_result)

+-------------------+-------------------+
| creditTime        |   phi(creditTime) |
| creditTime(short) |            0.3069 |
+-------------------+-------------------+
| creditTime(long)  |            0.6931 |
+-------------------+-------------------+


In [25]:
credit_result = infer.query(['creditTime'], evidence={
    "creditWorthiness": "Not worth", "paymentHistory": "good", "assets": "many", "degree": "medium"
    }
)
print(credit_result)

+-------------------+-------------------+
| creditTime        |   phi(creditTime) |
| creditTime(short) |            0.3212 |
+-------------------+-------------------+
| creditTime(long)  |            0.6788 |
+-------------------+-------------------+


## Using the model (approx. inference, sampling)

In [26]:
approx_infer = ApproxInference(model)

Default: n_samples=10000

In [39]:
credit_result = approx_infer.query(['creditTime'], evidence={
    "creditWorthiness": "Not worth", "paymentHistory": "good", "assets": "many", "degree": "medium"
    }
)
print(credit_result)

  0%|          | 0/10000 [00:00<?, ?it/s]

+-------------------+-------------------+
| creditTime        |   phi(creditTime) |
| creditTime(long)  |            0.6794 |
+-------------------+-------------------+
| creditTime(short) |            0.3206 |
+-------------------+-------------------+


In [40]:
credit_result = approx_infer.query(['creditTime'], n_samples=100000, evidence={
    "creditWorthiness": "Not worth", "paymentHistory": "good", "assets": "many", "degree": "medium"
    }
)
print(credit_result)

  0%|          | 0/100000 [00:00<?, ?it/s]

+-------------------+-------------------+
| creditTime        |   phi(creditTime) |
| creditTime(long)  |            0.6787 |
+-------------------+-------------------+
| creditTime(short) |            0.3213 |
+-------------------+-------------------+


In [29]:
credit_result = approx_infer.query(['creditTime'], n_samples=1000, evidence={
    "creditWorthiness": "Not worth", "paymentHistory": "good", "assets": "many", "degree": "medium"
    }
)
print(credit_result) # got 0.3490 here

  0%|          | 0/1000 [00:00<?, ?it/s]

+-------------------+-------------------+
| creditTime        |   phi(creditTime) |
| creditTime(long)  |            0.6660 |
+-------------------+-------------------+
| creditTime(short) |            0.3340 |
+-------------------+-------------------+


In [30]:
# results: (0.3235, 0.6765)
#          (0.3156, 0.6844) ...

## Other methods

In [31]:
from pgmpy.sampling import BayesianModelSampling

In [32]:
g_infer = BayesianModelSampling(model)

In [33]:
try:
    credit_result = g_infer.query(['creditTime'], evidence={
        "creditWorthiness": "Not worth", "paymentHistory": "good", "assets": "many", "degree": "medium"
        }
    )
except AttributeError as ae:
    print(f"no such method: {ae}")

no such method: 'BayesianModelSampling' object has no attribute 'query'


In [34]:
from pgmpy.sampling import GibbsSampling

In [35]:
try:
    b_infer = GibbsSampling(model)
except Exception as e:
    print(f"Error: {e}")

Error: Variable names cannot be same




## Belief Propagation

In [36]:
from pgmpy.inference import BeliefPropagation

In [37]:
believe_infer = BeliefPropagation(model)

In [38]:
credit_result = approx_infer.query(['creditTime'], evidence={
    "creditWorthiness": "Not worth", "paymentHistory": "good", "assets": "many", "degree": "medium"
    }
)
print(credit_result)

  0%|          | 0/10000 [00:00<?, ?it/s]

+-------------------+-------------------+
| creditTime        |   phi(creditTime) |
| creditTime(long)  |            0.6881 |
+-------------------+-------------------+
| creditTime(short) |            0.3119 |
+-------------------+-------------------+
