In [1]:
%%capture
import numpy as np
import scipy as sp
import matplotlib as mpl
import matplotlib.pyplot as plt
import networkx as nx
%matplotlib inline

In [2]:
%%capture
import sys
sys.path.append('/home/wrwt/Programming/pygraphmodels')
import graphmodels as gm

In [3]:
import warnings
warnings.filterwarnings('ignore', module='matplotlib')
warnings.filterwarnings('ignore', module='IPython')

In [4]:
def bic_score(x, pa):
    l = gm.information.discrete_mutual_information(data[[x]], data[pa]) - gm.information.discrete_entropy(data[[x]])
    n = data.shape[0]
    k = np.prod(true_dgm.factor(x).table.shape) - 1
    result = -n*l + 0.5 * np.log(n) * k
    return result

In [5]:
from os import listdir
import os.path
NETWORKS_PATH = '/home/wrwt/Programming/pygraphmodels/networks/'
network_filenames = listdir(NETWORKS_PATH)
true_dgm = gm.DGM.read(os.path.join(NETWORKS_PATH, 'earthquake.bif'))
true_dgm.draw()

  def _ipython_display_formatter_default(self):
  def _singleton_printers_default(self):


In [10]:
data = true_dgm.rvs(1000000)

In [11]:
from graphmodels.information import discrete_entropy, discrete_mutual_information

In [12]:
%%timeit
discrete_mutual_information(data[['Alarm']], data[['MaryCalls', 'JohnCalls']])

1 loop, best of 3: 591 ms per loop


In [105]:
from itertools import product
class MatrixGraph:
    def __init__(self, adjacency_matrix, names_to_idx=None):
        self.adj = adjacency_matrix
        self.names_to_idx = names_to_idx
        if names_to_idx is None:
            self.names_to_idx = {i:i for i in range(adj.shape[0])}
            
    @property
    def n(self):
        return self.adj.shape[0]
    
    @property
    def m(self):
        return np.sum(self.adj)
    
    @property
    def names(self):
        idx_to_names = self.idx_to_names
        return [idx_to_names[i] for i in range(self.n)]
    
    @property
    def idx_to_names(self):
        return {i:node for node, i in self.names_to_idx.items()}
    
    @staticmethod
    def from_networkx_DiGraph(graph):
        names_to_idx = {node:i for i, node in enumerate(graph.nodes())}
        adj = np.zeros((len(names_to_idx), len(names_to_idx)))
        for u, v in graph.edges():
            adj[names_to_idx[u], names_to_idx[v]] = 1
        return MatrixGraph(adj, names_to_idx=names_to_idx)

    def to_networkx_DiGraph(self):
        result = nx.DiGraph()
        result.add_nodes_from(self.names_to_idx.keys())
        for i, j in product(range(self.n), repeat=2):
            if self.adj[i, j]:
                result.add_edge(self.idx_to_names[i], self.idx_to_names[j])
        return result
    
    def is_acyclic(self):
        return nx.is_directed_acyclic_graph(self.to_networkx_DiGraph())
    
    def draw(self):
        return gm.DGM(self.to_networkx_DiGraph()).draw()

In [48]:
def bic_score(dgm, data):
    result = 0.
    for x in dgm.nodes():
        pa = dgm.predecessors(x)
        
        def n_values(x):
            return len(data[x].value_counts())
    
        k = n_values(x)*np.prod([n_values(pa_i) for pa_i in pa]) - 1
        n = data.shape[0]
        l = n*(discrete_mutual_information(data[[x]], data[pa]) - \
               discrete_entropy(data[[x]]))
        result += l - 0.5 * np.log(n) * k
    return result

In [97]:
class ScoreBIC:
    def __init__(self, graph, data):
        self.graph = graph
        self.data = data
        self.n_values = {name: len(self.data[name].value_counts()) for name in self.data.columns}
        self.cache = {}
        
    def _footprint(self, node, parents):
        return tuple([node] + list(parents))
        
    def __call__(self, node, parents):
        fp = self._footprint(node, parents)
        if fp in self.cache:
            return self.cache[fp]
        
        pa_names = [self.graph.idx_to_names[idx] for idx, exists in enumerate(parents) if exists]
        node_name = self.graph.idx_to_names[node]
            
        k = self.n_values[node_name]*np.prod([self.n_values[pa_i] for pa_i in pa_names]) - 1
        n = self.data.shape[0]
        l = n*discrete_mutual_information(data[[node_name]], data[pa_names])
        
        result = l - 0.5 * np.log(n) * k
        self.cache[fp] = result
        return result

In [98]:
mxg = MatrixGraph.from_networkx_DiGraph(true_dgm)
score = ScoreBIC(mxg, data)

In [99]:
mxg.names

['Burglary', 'MaryCalls', 'Alarm', 'JohnCalls', 'Earthquake']

In [100]:
score(1, [0, 0, 0, 0, 0])

-6.9077552789821368

In [101]:
score(1, [0, 0, 1, 0, 0])

36519.607616759378

In [102]:
score(1, [1, 0, 1, 0, 0])

36492.351913303683

In [103]:
score(4, [0, 0, 0, 0, 0])

-6.9077552789821368

In [104]:
%load_ext line_profiler
%lprun -f ScoreBIC.__call__ score(4, [1, 1, 1, 1, 0])

The line_profiler extension is already loaded. To reload it, use:
  %reload_ext line_profiler


In [None]:
class InvalidOperation(Exception): pass

class LocalOperation:
    def __init__(self, graph, fscore):
        self.graph = graph
        self.fscore = fscore
    
    def do(self):
        raise NotImplementedError()
    
    def undo(self):
        raise NotImplementedError()
        
    @property
    def score(self):
        raise NotImplementedError()

In [136]:
class AddEdge(LocalOperation):
    def __init__(self, graph, fscore, src, dst):
        LocalOperation.__init__(self, graph, fscore)
        self.src = src
        self.dst = dst
    
    def do(self):
        if self.graph.adj[self.src, self.dst]:
            raise InvalidOperation()
        self.graph.adj[self.src, self.dst] = 1
        if not self.graph.is_acyclic():
            self.graph.adj[self.src, self.dst] = 0
            raise InvalidOperation()
        return self
    
    def undo(self):
        self.graph.adj[self.src, self.dst] = 0
        return self
    
    @property
    def score(self):
        pa = self.graph.adj[:, self.dst].copy()
        score = -self.fscore(self.dst, pa)
        pa[self.src] = 1
        score += self.fscore(self.dst, pa)
        return score

In [139]:
class RemoveEdge(LocalOperation):
    def __init__(self, graph, fscore, src, dst):
        LocalOperation.__init__(self, graph, fscore)
        self.src = src
        self.dst = dst
    
    def do(self):
        if not self.graph.adj[self.src, self.dst]:
            raise InvalidOperation()
        self.graph.adj[self.src, self.dst] = 0
        return self
    
    def undo(self):
        self.graph.adj[self.src, self.dst] = 1
        return self
    
    @property
    def score(self):
        pa = self.graph.adj[:, self.dst].copy()
        score = -self.fscore(self.dst, pa)
        pa[self.src] = 0
        score += self.fscore(self.dst, pa)
        return score

In [141]:
class ReverseEdge(LocalOperation):
    def __init__(self, graph, fscore, src, dst):
        LocalOperation.__init__(self, graph, fscore)
        self.src = src
        self.dst = dst
        
    def do(self):
        if not self.graph.adj[self.src, self.dst]:
            raise InvalidOperation()
        self.graph.adj[self.src, self.dst] = 0
        self.graph.adj[self.dst, self.src] = 1
        if not self.graph.is_acyclic():
            self.graph.adj[self.src, self.dst] = 1
            self.graph.adj[self.dst, self.src] = 0
            raise InvalidOperation()
        return self
    
    def undo(self):
        self.graph.adj[self.src, self.dst] = 1
        self.graph.adj[self.dst, self.src] = 0
        return self
    
    @property
    def score(self):
        pa = self.graph.adj[:, self.dst].copy()
        score = -self.fscore(self.dst, pa)
        pa[self.src] = 0
        score += self.fscore(self.dst, pa)
        
        pa = self.graph.adj[:, self.src].copy()
        score -= self.fscore(self.src, pa)
        pa[self.dst] = 1
        score += self.fscore(self.src, pa)
        return score

In [142]:
mxg = MatrixGraph.from_networkx_DiGraph(true_dgm)
score = ScoreBIC(mxg, data)

In [143]:
mxg.draw()

In [144]:
mxg.names

['Burglary', 'MaryCalls', 'Alarm', 'JohnCalls', 'Earthquake']

In [145]:
op = AddEdge(mxg, score, 0, 3)
op.score

-26.398795090964995

In [146]:
mxg.adj[0, 1] = 1
mxg.draw()

In [157]:
op = RemoveEdge(mxg, score, 2, 1)
op.score

-36526.515372038361

In [160]:
op.do()

InvalidOperation: 

In [159]:
op.score

0.0