In [7]:
import math
import graphviz
import math
import copy
import sys

------
### Entropia
<img src="img/entropy.png" style="height: 100px">

In [1]:
class EntropyMetric(object):
    def __init__(self, network, test_data):
        self.network = network
        self.test_data = test_data
    
    def compute_entropy_metric(self):
        result = 0
        
        for node in self.network:
            if not node['parents']: # Brak rodzica
                result += self.compute_metric_for_node_without_parent(node)
                continue
                
            for node_state in node['r']:
                for parent in node['parents']:
                    for parent_state in parent['q']:
                        N_i_j = self.compute_occurences_for_all_node_states(node, (parent['name'], parent_state))
                        N_i_j_k = self.count_occurences((node['name'], node_state), (parent['name'], parent_state))
                        if (N_i_j_k != 0 and N_i_j != 0):
                            result += self.compute_metric_component(N_i_j, N_i_j_k)
                    
        return -1 * len(self.test_data) * result
    
    def compute_metric_for_node_without_parent(self, node):  
        N_i_j = len(self.test_data)
        N_i_j_k = self.compute_occurences_for_all_node_states_without_parent(node)
        if N_i_j_k == 0:
            N_i_j_k = 1
        
        return self.compute_metric_component(N_i_j, N_i_j_k)
    
    def compute_occurences_for_all_node_states(self, node, parent):
        result = 0
        for state in node['r']:
            result += self.count_occurences((node['name'], state), parent)
        return result
    
    def compute_occurences_for_all_node_states_without_parent(self, node):
        result = 0
        for state in node['r']:
            result += self.count_single_occurences(node['name'], state)
        return result
    
    def count_occurences(self, node, parent):
        counter = 0
        for data in self.test_data:
            if data[node[0]] == node[1] and data[parent[0]] == parent[1]:
                counter += 1 
        return counter
    
    def count_single_occurences(self, name, state):
        counter = 0
        for data in self.test_data:
            if data[name] == state:
                counter += 1 
        return counter
     
    def compute_metric_component(self, N_i_j, N_i_j_k):
        return (N_i_j_k / len(self.test_data)) * math.log10(N_i_j_k / N_i_j)

------
### Metryka AIC 
<img src="img/qaic.png"><br/>
<img src="img/k.png">

In [3]:
class AICMetric(object):
    def compute_aic_metric(self, Bs, D):
        return EntropyMetric(Bs, D).compute_entropy_metric() + self.compute_k_factor(Bs)
    
    def compute_k_factor(self, network):
        k_factor = 0
        for node in network:
            k_factor += (len(node['r']) - 1) * self.count_number_of_parent_states(node)
        return k_factor
    
    def count_number_of_parent_states(self, node):
        counter = 0
        for parent in node['parents']:
            counter += len(parent['q'])
        return counter

------
### Metryka MDL (Minimum Description Length)
<img src="img/mdl.png" style="height: 70px"> <br/>
<img src="img/k.png">

In [5]:
class MDLMetric(object):
    def compute_mdl_metric(self, network, test_data):
        return EntropyMetric(network, test_data).compute_entropy_metric() + (self.compute_k_factor(network) / 2) * math.log10(len(test_data))
    
    def compute_k_factor(self, network):
        k_factor = 0
        for node in network:
             k_factor += (len(node['r']) - 1) * self.count_number_of_parent_states(node)
        return k_factor
    
    def count_number_of_parent_states(self, node):
        counter = 0
        for parent in node['parents']:
            counter += len(parent['q'])
        return counter

------
### Metryka bayesowska
<img src="img/bayes-k2.png" style="height: 100px">

In [6]:
class BayesianMetric(object):
    def __init__(self, network, test_data):
        self.network = network
        self.test_data = test_data
        
    def compute_bayesian_metric(self):
        result = 1
        
        for node in self.network:
            r = len(node['r'])
            for parent in node['parents']:
                for parent_state in parent['q']:
                    N_i_j = self.compute_N_i_j(node['name'], node['r'], parent_state, parent['name'])
                    
                    result *= math.factorial(r - 1) / math.factorial(r - 1 + N_i_j)
                    
                    for node_state in node['r']:
                        result *= math.factorial(self.check_occurences(node['name'], parent['name'], node_state, parent_state))
        
        return result
    
    def compute_N_i_j(self, node_name, node_states, parent_state, parent_name):
        N_i_j = 0
        for node_state in node_states:
            N_i_j += self.check_occurences(node_name, parent_name, node_state, parent_state)
        return N_i_j
    
    def check_occurences(self, node_name, parent_name, node_state, parent_state):
        result = 0
        for single_test_data in self.test_data:
            if single_test_data[node_name] == node_state and single_test_data[parent_name] == parent_state:
                result += 1
        return result
        