In [1]:
import networkx as nx
!pip install pgmpy
import pandas as pd
from pgmpy.models import BayesianNetwork
from pgmpy.factors.discrete import TabularCPD
from pgmpy.inference import VariableElimination
import matplotlib.pyplot as plt
import json
import numpy as np

Collecting pgmpy
  Downloading pgmpy-0.1.26-py3-none-any.whl.metadata (9.1 kB)
Downloading pgmpy-0.1.26-py3-none-any.whl (2.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m47.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pgmpy
Successfully installed pgmpy-0.1.26


In [85]:
# Function to create the Bayesian network
def create_bayesian_network():
    model = BayesianNetwork()

    # Add nodes and edges
    model.add_nodes_from(['Parent1_A', 'Parent1_B', 'Parent2_A', 'Parent2_B', 'Child_A', 'Child_B', 'Child_BloodType'])
    model.add_edges_from([
        ('Parent1_A', 'Child_A'), ('Parent2_A', 'Child_A'),
        ('Parent1_B', 'Child_B'), ('Parent2_B', 'Child_B'),
        ('Child_A', 'Child_BloodType'), ('Child_B', 'Child_BloodType')
    ])

    # Add CPDs
    cpd_parent1_a = TabularCPD('Parent1_A', 3, [[0.64], [0.28], [0.08]])
    cpd_parent1_b = TabularCPD('Parent1_B', 3, [[0.64], [0.28], [0.08]])
    cpd_parent2_a = TabularCPD('Parent2_A', 3, [[0.64], [0.28], [0.08]])
    cpd_parent2_b = TabularCPD('Parent2_B', 3, [[0.64], [0.28], [0.08]])

    cpd_child_a = TabularCPD(
        variable='Child_A', variable_card=3,
        values=[
            [0.25, 0.5, 0.0, 0.5, 0.25, 0.0, 0.0, 0.0, 0.0],  # O
            [0.5, 0.5, 0.5, 0.0, 0.5, 0.5, 0.5, 0.0, 0.0],    # A
            [0.25, 0.0, 0.5, 0.5, 0.25, 0.5, 0.5, 1.0, 1.0]   # B
        ],
        evidence=['Parent1_A', 'Parent2_A'], evidence_card=[3, 3]
    )

    cpd_child_b = TabularCPD(
        variable='Child_B', variable_card=3,
        values=[
            [0.25, 0.5, 0.0, 0.5, 0.25, 0.0, 0.0, 0.0, 0.0],  # O
            [0.5, 0.5, 0.5, 0.0, 0.5, 0.5, 0.5, 0.0, 0.0],    # A
            [0.25, 0.0, 0.5, 0.5, 0.25, 0.5, 0.5, 1.0, 1.0]   # B
        ],
        evidence=['Parent1_B', 'Parent2_B'], evidence_card=[3, 3]
    )

    cpd_child_blood_type = TabularCPD(
        variable='Child_BloodType', variable_card=4,
        values=[
            [0.0625, 0.0625, 0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0625],  # O
            [0.5,    0.5,    0.3333, 0.5,    0.5,    0.0,    0.5,    0.0,    0.6875],  # A
            [0.1875, 0.0,    0.3333, 0.0,    0.0,    0.5,    0.0,    0.5,    0.0625],  # B
            [0.25,   0.4375, 0.3333, 0.5,    0.5,    0.5,    0.5,    0.5,    0.1875]   # AB
        ],
        evidence=['Child_A', 'Child_B'], evidence_card=[3, 3]
    )
    cpd_child_blood_type.values = cpd_child_blood_type.values / np.sum(cpd_child_blood_type.values, axis=0)
    # Validate CPD integrity
    print("Sum of each column in Child_BloodType CPD:")
    print(np.sum(cpd_child_blood_type.values, axis=0))

    model.add_cpds(cpd_parent1_a, cpd_parent1_b, cpd_parent2_a, cpd_parent2_b, cpd_child_a, cpd_child_b, cpd_child_blood_type)
    assert model.check_model()
    return model

# Function to infer blood type
def infer_blood_type(model, evidence):
    infer = VariableElimination(model)
    prob_dist = infer.query(variables=['Child_BloodType'], evidence=evidence)
    return prob_dist

# Function to process the family tree and match expected output
def process_family_tree(data):
    model = create_bayesian_network()
    test_results = {result['person']: result['result'] for result in data.get('test-results', [])}

    # Translate known blood types into allele evidence
    blood_type_to_alleles = {
        'O': (0, 0),  # Use indices instead of state names
        'A': (1, 0),
        'B': (2, 0),
        'AB': (1, 2)
    }
    evidence = {}
    for person, blood_type in test_results.items():
        if blood_type in blood_type_to_alleles:
            alleles = blood_type_to_alleles[blood_type]
            if person == "Linda":  # Assign evidence to Linda for a-11
                evidence['Parent2_A'] = alleles[0]
                evidence['Parent2_B'] = alleles[1]
            elif person == "Lyn":  # Assign evidence to Lyn for a-00
                evidence['Parent2_A'] = alleles[0]
                evidence['Parent2_B'] = alleles[1]
            elif person == "Samantha":  # Assign evidence to Samantha for a-07
                evidence['Parent2_A'] = alleles[0]
                evidence['Parent2_B'] = alleles[1]

    # Print evidence for debugging
    print("Evidence:", evidence)

    # Infer the queried person's blood type
    result = infer_blood_type(model, evidence)

    # Extract the distribution as a dictionary
    distribution = {
        "O": result.values[0],
        "A": result.values[1],
        "B": result.values[2],
        "AB": result.values[3]
    }

    return distribution

# Example usage for multiple problems:
files = ['/content/problem-a-07.json', '/content/problem-a-00.json', '/content/problem-a-11.json']
for file_name in files:
    with open(file_name, 'r') as file:
        data = json.load(file)
    output_distribution = process_family_tree(data)
    print(f"File: {file_name}")
    print("Output Distribution:", output_distribution)


Sum of each column in Child_BloodType CPD:
[[1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]]
Evidence: {'Parent2_A': 1, 'Parent2_B': 2}
File: /content/problem-a-07.json
Output Distribution: {'O': 0.016275, 'A': 0.32138750000000005, 'B': 0.23396250000000002, 'AB': 0.42837500000000006}
Sum of each column in Child_BloodType CPD:
[[1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]]
Evidence: {'Parent2_A': 1, 'Parent2_B': 0}
File: /content/problem-a-00.json
Output Distribution: {'O': 0.019275, 'A': 0.38226250000000006, 'B': 0.174525, 'AB': 0.4239375}
Sum of each column in Child_BloodType CPD:
[[1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]]
Evidence: {'Parent2_A': 2, 'Parent2_B': 0}
File: /content/problem-a-11.json
Output Distribution: {'O': 0.011475, 'A': 0.35902500000000004, 'B': 0.18687500000000004, 'AB': 0.44262500000000005}
