### Entropy metric
<img src="img/aic.png">

#### Dependencies
```sh
pip install liac-arff
```

In [2]:
import math

In [151]:
class EntropyMetric(object):
    """Entropy metric H(Bs, D) module (Bs - Bayes net, D - test data set)"""
    def __init__(self, Bs, D):
        # Liczba probek testowych w D
        self.N = len(D)
        # Siec Bs
        self.Bs = Bs
        # Dane testowe
        self.D = D
        
    def check_occurences(self, node_name, parent_name, node_state, parent_state):
        result = 0
        
        #print("FOR ", node_name, "=", node_state, " ", parent_name, "=", parent_state)
        for i, p in enumerate(self.D):
            found_node_state = p[node_name]
            
            if (parent_name != ''):
                found_parent_state = p[parent_name]
                is_found = (found_node_state == node_state and found_parent_state == parent_state)
                #print(p, is_found)
                if (is_found):
                    result += 1
            else:
                if (found_node_state == node_state):
                    result += 1

        return result

    def check_parent_occurences(self, parent_name, parent_state):
        result = 0
        # brak rodzica - przyjmuje się, że wynikiem jest liczba próbek (czyli N)
        if (parent_name == ''):
            return self.N

        for i, p in enumerate(self.D):
            if (p[parent_name] == parent_state):
                result += 1

        return result
    
    def compute_entropy_metric(self):
        result = 0
        for i in range(0, len(self.Bs)):
            r_i = self.Bs[i]['r']
            q_i = self.Bs[i]['q']
            
            for j in range(0, len(q_i)):
                for k in range(0, len(r_i)):
                    N_i_j_k = self.check_occurences(self.Bs[i]['name'], self.Bs[i]['parent'], r_i[k], q_i[j])
                    N_i_j = self.check_parent_occurences(self.Bs[i]['parent'], q_i[j])
                    
                    if (N_i_j_k != 0 and N_i_j != 0):
                        result += (N_i_j_k / self.N) * math.log10(N_i_j_k / N_i_j)
                        
        return -1 * self.N * result

------

### K2 algorithm class

In [150]:
class K2Algorithm(object):
    def __init__(self, D, scoring_method):
        # D - test data
        self.D = D
        self.scoring_method = scoring_method
        
    def find_optimal_net(self):
        return "found"

------

### ARFF file loading

In [149]:
import arff
import pprint

pp = pprint.PrettyPrinter(depth=6)

with open('data/weather.arff') as fh:
    data = arff.load(fh)
    
    attributes = []
    for i, p in enumerate(data['attributes']):
        attributes.append({'name': p[0], 'states': p[1]})
    
    sample_data = []
    for i, p in enumerate(data['data']):
        temp_dict = {}
        for j, d in enumerate(p):
            temp_dict.update({attributes[j]['name']: d})
        sample_data.append(temp_dict)

------

### Class argument choice

In [152]:
    index_of_class_attribute = 4
    print("Class argument: ")
    pp.pprint(attributes[index_of_class_attribute])
    
    final_attributes = []
    final_attributes.append(attributes[index_of_class_attribute])
    
    for i, p in enumerate(attributes):
        if i != index_of_class_attribute:
            final_attributes.append(p)
    
    print("\nAll arguments: ")
    pp.pprint(final_attributes)

Class argument: 
{'name': 'play', 'states': ['yes', 'no']}

All arguments: 
[{'name': 'play', 'states': ['yes', 'no']},
 {'name': 'outlook', 'states': ['sunny', 'overcast', 'rainy']},
 {'name': 'temperature', 'states': ['hot', 'mild', 'cool']},
 {'name': 'humidity', 'states': ['high', 'normal']},
 {'name': 'windy', 'states': ['TRUE', 'FALSE']}]


-----
### Main algorithm

In [153]:
    Bs = []
    Bs.append({'r': final_attributes[0]['states'], 'q': ['parent'], 'name': final_attributes[0]['name'], 'parent': ''})
    Bs.append({'r': final_attributes[1]['states'], 'q': final_attributes[0]['states'], 'name': final_attributes[1]['name'], 'parent': final_attributes[0]['name']})

    print("Entropy metric:", EntropyMetric(Bs, sample_data).compute_entropy_metric())

Entropy metric: 9.570705023209584
