In [25]:
import os
import json
import pandas as pd
import numpy as np
from time import time

from  pomegranate  import BayesianNetwork
import pygraphviz
import networkx
import tempfile

import seaborn as sns
import matplotlib.image as mpl_image
import matplotlib.pyplot as plt

%matplotlib inline 
sns.set(style="white")

import glob

f = "../2*.model*.json"

models = glob.glob(f)

# Get the domain expert network
with open("../model/bayesnetwork/network.json", 'rt') as infile:
    net = json.load(infile)
clus = {i: lbl for lbl, indsets in net['nodes'].items() for i in indsets}

In [27]:
def plot_network(model, net, label):
    """ Plots a learnt BN model with SME network (net) """
    
    G = pygraphviz.AGraph(directed=True, strict=False)
    for nodes, _ in net['nodes'].items():
        G.add_node(nodes)

    for src, destinations in net['edges'].items():
        for des in destinations:
            G.add_edge(src, des, style='dotted', color='red')


    # Get the learnt structure 
    #with open("model.json", 'rt') as infile:
    #    mj = infile.read()

    #model = BayesianNetwork.from_json(mj)
    print("Model with {} edges.".format(model.edge_count()))

    data_links = []
    for parent, child in model.edges:

        try:
            i = clus[parent.name]


            try:
                j = clus[child.name]

                if (i, j) not in data_links:
                    data_links.append((i, j))

            except KeyError:
                G.add_node(child.name)
                data_links.append((i, child.name))

        except KeyError:

            try:
                j = clus[child.name]

                G.add_node(parent.name)
                data_links.append((parent.name, j))

            except KeyError:

                continue

    for i, j in data_links:
        G.add_edge(i, j, style='solid', penwidth='2', color='blue')


    with open("img/{}.png".format(label), 'wb') as tf:
        G.draw(tf, format='png', prog='dot')

In [42]:
def get_label(m):
    algo = m.split('.')[3].split('-')[1]
    g = m.split('.')[3].split('-')[2]
    grap = 'Constrained' if g == 'True' else 'Unconstrained'
    return "{} {}".format(algo.title(), grap)
    
for m in models:
    with open(m, 'rt') as infile:
        mj = infile.read()
    
    model = BayesianNetwork.from_json(mj)
    lbl = get_label(m)
    plot_network(model, net, lbl)

Model with 25 edges.
Model with 17 edges.
Model with 25 edges.


# The networks

Here the plots from the structure learning task.


## The Greedy Constrained case

<img src="img/Greedy Constrained.png" width="80%">

## The Exact Constrained one
<img src="img/Exact Constrained.png" width="80%">

This doesn't appear to make much sense.

## The Exact Unconstrained one
<img src="img/Exact Unconstrained.png" width="80%">





# Inference tasks

In [48]:
# Reload the raw dataset used in structure learning
import pandas as pd
df = pd.read_pickle("df.pkl")

In [50]:
df.loc[('MMR', 2017)]

Indicator Code
SP.POP.GROW                                worse
SP.URB.GROW                                worse
NY.GDP.PCAP.PP.CD                           best
SI.POV.GINI                                 good
SL.UEM.TOTL.ZS                              best
TRI.CORR.INDEX                              best
VC.BTL.DETH                                 poor
VDEM.FRD.POL.KILL                           good
UC.FAT.CIV                                 worse
HR.SCR.MEAN                                worse
FSI.STA.LEG                              average
FSI.PUB.SER                                 poor
EMDAT.NAT.OCCURRENCE                        poor
EMDAT.NAT.TOTAL.AFFECTED                   worse
ADESA                                       good
DRC.TOT.DISP                (3159.743, 6702.945]
Name: (MMR, 2017), dtype: object

In [44]:
with open('../20200319140828.model-greedy-True.json', 'rt') as infile:
        mj = infile.read()
    
model = BayesianNetwork.from_json(mj)

In [82]:
tmp = df.loc[('MMR', 2017)].copy(deep=True)
tmp['DRC.TOT.DISP'] = None
model.predict_proba(tmp.to_list())

array(['worse', 'worse', 'best', 'good', 'best', 'best', 'poor', 'good',
       'worse', 'worse', 'average', 'poor', 'poor', 'worse', 'good',
       {
    "class" :"Distribution",
    "dtype" :"str",
    "name" :"DiscreteDistribution",
    "parameters" :[
        {
            "(-0.001, 0.0919]" :0.0,
            "(23.05, 127.664]" :0.0,
            "(0.0919, 5.565]" :0.030000000000000127,
            "(127.664, 633.824]" :0.05000000000000009,
            "(6702.945, 11172.9]" :0.2199999999999997,
            "(11172.9, 77618.945]" :0.25499999999999967,
            "(1355.364, 3159.743]" :0.17999999999999985,
            "(3159.743, 6702.945]" :0.1599999999999998,
            "(633.824, 1355.364]" :0.09000000000000002,
            "(5.565, 23.05]" :0.015000000000000187
        }
    ],
    "frozen" :false
}], dtype=object)

In [90]:
scenario = tmp.copy(deep=True)

scenario['TRI.CORR.INDEX'] = 'worse'
scenario['SL.UEM.TOTL.ZS'] = 'worse'
scenario['NY.GDP.PCAP.PP.CD'] = 'worse'
scenario['SI.POV.GINI'] = 'worse'

#model.predict_proba(scenario.to_list())
model.predict_proba(['worse', 'worse', 'worse', 'worse', 'worse', 'worse', 'worse',
       'worse', 'worse', 'worse', 'worse', 'worse', 'worse', 'worse',
       'worse', None])

array(['worse', 'worse', 'worse', 'worse', 'worse', 'worse', 'worse',
       'worse', 'worse', 'worse', 'worse', 'worse', 'worse', 'worse',
       'worse',
       {
    "class" :"Distribution",
    "dtype" :"str",
    "name" :"DiscreteDistribution",
    "parameters" :[
        {
            "(-0.001, 0.0919]" :0.0,
            "(23.05, 127.664]" :0.0,
            "(0.0919, 5.565]" :0.030000000000000127,
            "(127.664, 633.824]" :0.05000000000000009,
            "(6702.945, 11172.9]" :0.2199999999999997,
            "(11172.9, 77618.945]" :0.25499999999999967,
            "(1355.364, 3159.743]" :0.17999999999999985,
            "(3159.743, 6702.945]" :0.1599999999999998,
            "(633.824, 1355.364]" :0.09000000000000002,
            "(5.565, 23.05]" :0.015000000000000187
        }
    ],
    "frozen" :false
}], dtype=object)

In [93]:
model.predict_proba(['best', 'best','best','best','best','best','best',
                     'best','best','best','best','best','best','best', 'best', None])

array(['best', 'best', 'best', 'best', 'best', 'best', 'best', 'best',
       'best', 'best', 'best', 'best', 'best', 'best', 'best',
       {
    "class" :"Distribution",
    "dtype" :"str",
    "name" :"DiscreteDistribution",
    "parameters" :[
        {
            "(-0.001, 0.0919]" :0.17499999999999985,
            "(23.05, 127.664]" :0.2099999999999997,
            "(0.0919, 5.565]" :0.1549999999999999,
            "(127.664, 633.824]" :0.0750000000000001,
            "(6702.945, 11172.9]" :0.03000000000000013,
            "(11172.9, 77618.945]" :0.060000000000000095,
            "(1355.364, 3159.743]" :0.010000000000000196,
            "(3159.743, 6702.945]" :0.05500000000000014,
            "(633.824, 1355.364]" :0.0500000000000001,
            "(5.565, 23.05]" :0.17999999999999988
        }
    ],
    "frozen" :false
}], dtype=object)

In [92]:
len(tmp)

16