In [11]:
from pgmpy.models import BayesianModel
from pgmpy.factors.discrete import TabularCPD

In [12]:
PER_UNIT_GDP = 'per unit GDP'
ROAD_DENSITY = 'road density'
POPULATION_DENSITY = 'population density'
ELEVATION = 'elevation'
SLOPE = 'slope'
RAINFALL_DURATION = 'rainfall duration'
RIVER_DENSITY = 'river density'
RAINFALL_AMOUNT = 'rainfall amount'
FLOOD = 'flood'

In [13]:
values_dictionary = {
    PER_UNIT_GDP: [
        [0.1], 
        [0.2], 
        [0.7]
    ],
    ROAD_DENSITY: [
        [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
        [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2],
        [0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7]
    ],
    POPULATION_DENSITY: [
        [0.1, 0.1, 0.1],
        [0.2, 0.2, 0.2],
        [0.7, 0.7, 0.7],
    ],
    ELEVATION: [
        [0.1],
        [0.2], 
        [0.7]
    ],
    SLOPE: [
        [0.6], 
        [0.4]
    ],
    RAINFALL_DURATION: [
        [0.1], 
        [0.2], 
        [0.7]
    ],
    RIVER_DENSITY: [
        [0.3], 
        [0.7]
    ],
    RAINFALL_AMOUNT: [
        [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
        [0.7, 0.7, 0.7, 0.7, 0.7, 0.7],
        [0.2, 0.2, 0.2, 0.2, 0.2, 0.2],
    ],
    FLOOD: [
        [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
        [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2],
        [0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7]
    ],
}

In [14]:
table_dictionary = {
    PER_UNIT_GDP: ['High', 'Medium', 'Low'],
    ROAD_DENSITY: ['High', 'Medium', 'Low'],
    POPULATION_DENSITY: ['High', 'Medium', 'Low'],
    ELEVATION: ['High', 'Medium', 'Low'],
    SLOPE: ['Steep', 'Flat'],
    RAINFALL_DURATION: ['Long', 'Medium', 'Short'],
    RIVER_DENSITY: ['Dense', 'Sparse'],
    RAINFALL_AMOUNT: ['Huge', 'Medium', 'Little'],
    FLOOD: ['Yes', 'No']
}

In [15]:
edges = [
    (PER_UNIT_GDP, ROAD_DENSITY), 
    (PER_UNIT_GDP, POPULATION_DENSITY), 
    (POPULATION_DENSITY, ROAD_DENSITY),
    (ROAD_DENSITY, FLOOD),
    (ELEVATION, SLOPE),
    (SLOPE, FLOOD),
    (RAINFALL_DURATION, RAINFALL_AMOUNT),
    (RIVER_DENSITY, RAINFALL_AMOUNT),
    (RAINFALL_AMOUNT, FLOOD)
]

In [16]:
# Defining the model structure. We can define the network by just passing a list of edges.
model = BayesianModel(edges)

In [17]:
def pick_dictionary_subset(dictionary, keys):
    return dict((k, dictionary[k]) for k in keys if k in dictionary)

In [23]:
cpd_per_unit_gdp = TabularCPD(
    variable = PER_UNIT_GDP, 
    variable_card = len(table_dictionary[PER_UNIT_GDP]), 
    values = values_dictionary[PER_UNIT_GDP],
    #evidence=['PaymentHistory'],
    #evidence_card=[3],
    state_names= pick_dictionary_subset(table_dictionary, [PER_UNIT_GDP])
)

cpd_population_density = TabularCPD(
    variable = POPULATION_DENSITY, 
    variable_card = len(table_dictionary[POPULATION_DENSITY]), 
    values = values_dictionary[POPULATION_DENSITY],
    evidence=[PER_UNIT_GDP],
    evidence_card=[len(table_dictionary[PER_UNIT_GDP])],
    state_names= pick_dictionary_subset(
        table_dictionary, [POPULATION_DENSITY, PER_UNIT_GDP]
    )
)

In [24]:
print(cpd_population_density)

+----------------------------+--------------------+----------------------+-------------------+
| per unit GDP               | per unit GDP(High) | per unit GDP(Medium) | per unit GDP(Low) |
+----------------------------+--------------------+----------------------+-------------------+
| population density(High)   | 0.1                | 0.1                  | 0.1               |
+----------------------------+--------------------+----------------------+-------------------+
| population density(Medium) | 0.2                | 0.2                  | 0.2               |
+----------------------------+--------------------+----------------------+-------------------+
| population density(Low)    | 0.7                | 0.7                  | 0.7               |
+----------------------------+--------------------+----------------------+-------------------+
