In [1]:
import math
import graphviz
import math
import copy
import sys
import arff
import pprint

------
### Algorytm K2
<img src="../img/K2.png" style="height: 350px">

In [2]:
class K2Algorithm(object):
    def __init__(self, attributes, sample_data, score_method, number_of_parents):
        self.sample_data = sample_data
        self.score_method = score_method
        self.attributes = attributes
        self.number_of_parents = number_of_parents
        
    def find_optimal_net(self):
        optimal_net = []
        
        for i, attribute in enumerate(self.attributes):
            parents_of_node_indexes = []
            find_more = True
        
            old_score = self.compute_metric([{'r': attribute['states'], 'name': attribute['name'], 'parents': []}])

            while (find_more and len(parents_of_node_indexes) < self.number_of_parents):
                max_new_parent_score, max_new_parent_index = self.find_node_with_max_score(i, attribute, parents_of_node_indexes)
                
                if (max_new_parent_score > old_score):
                    old_score = max_new_parent_score
                    parents_of_node_indexes.append(max_new_parent_index)
                else:
                    find_more = False
                    
            optimal_net.append({'r': attribute['states'], 'name': attribute['name'], 'parents': self.convert_array_of_indexes_to_parents(parents_of_node_indexes)})
                          
        return optimal_net
    
    def find_node_with_max_score(self, index, attribute, parent_indexes):
        current_parents = []
        for current_parent_index in parent_indexes:
            current_parents.append(self.convert_index_to_parent(current_parent_index))
        temp_net = [{'name': attribute['name'], 'r': attribute['states'], 'parents': current_parents}]
        
        parent_index_with_max_score = -1
        max_score = self.compute_metric(temp_net)
        
        for parent_index in range(0, index):
            if parent_index not in parent_indexes:
                temp_net_copy = copy.deepcopy(temp_net)
                temp_net_copy[0]['parents'].append(self.convert_index_to_parent(parent_index))
                parent_candidate_score = self.compute_metric(temp_net_copy)
                
                if (parent_candidate_score > max_score):
                    max_score = parent_candidate_score
                    parent_index_with_max_score = parent_index
                
        return (max_score, parent_index_with_max_score)
    
    def convert_index_to_parent(self, index):
        return {'name': self.attributes[index]['name'], 'q': self.attributes[index]['states']}
    
    def convert_array_of_indexes_to_parents(self, indexes):
        parents = []
        for index in indexes:
            parents.append(self.convert_index_to_parent(index))
        return parents
    
    def compute_metric(self, net):
        if self.score_method == 'aic':
            return AICMetric().compute_aic_metric(net, self.sample_data)
        elif self.score_method == 'mdl':
            return MDLMetric().compute_mdl_metric(net, self.sample_data)
        elif self.score_method == 'bayes':
            return BayesianMetric(net, self.sample_data).compute_bayesian_metric()
        elif self.score_method == 'entropy':
            return EntropyMetric(net, self.sample_data).compute_entropy_metric()
        
        raise ValueError(self.scoring_method + " is not a valid scoring method!")