### Entropy metric
<img src="img/aic.png">

#### Dependencies
```sh
pip install liac-arff
```

In [2]:
import math
import arff
import pprint

In [3]:
class EntropyMetric(object):
    """Entropy metric H(Bs, D) module (Bs - Bayes net, D - test data set)"""
    def __init__(self):
        # Liczba probek testowych w D
        self.N = 3
        # Testowa siec Bs, X0 -> X1
        self.Bs = [{'r': 3, 'q': 1, 'name': 'X0', 'parent': ''}, {'r': 3, 'q': 3, 'name': 'X1', 'parent': 'X0'}] 
        # Dane testowe
        self.D = [ {'X0': 1, 'X1': 1}, {'X0': 1, 'X1': 1}, {'X0': 2, 'X1': 1} ]
        
    '''node_state = k, parent_state = j, node index = i'''
    def check_occurences(self, node_name, parent_name, node_state, parent_state):
        #print("FOR ", node_name, "=", node_state, " ", parent_name, "=", parent_state)
        result = 0
        # brak parenta
        if (parent_name == ''):
            return result

        for i, p in enumerate(self.D):
            found_node_state = p[node_name]
            found_parent_state = p[parent_name]
            is_found = (found_node_state == node_state and found_parent_state == parent_state)
            #print(p, is_found)
            if (is_found):
                result += 1

        return result

    '''parent_state = j'''
    def check_parent_occurences(self, parent_name, parent_state):
        #print("FOR parent ", parent_name, "=", parent_state)
        result = 0
        # brak parenta
        if (parent_name == ''):
            return result

        for i, p in enumerate(self.D):
            if (p[parent_name] == parent_state):
                #print("found!")
                result += 1

        return result
    
    def compute_entropy_metric(self):
        # Pierwsza iteracja (póki co ręcznie)
        result = 0

        for i in range(0, len(self.Bs)):
            r_i = self.Bs[i - 1]['r'] #r(i), czyli tak naprawdę r(i - 1)
            q_i = self.Bs[i - 1]['q'] #q(i), czyli tak naprawdę q(i - 1)
            
            for j in range(1, q_i + 1):
                for k in range(1, r_i + 1):
                    N_i_j_k = self.check_occurences(self.Bs[i]['name'], self.Bs[i]['parent'], k, j)
                    N_i_j = self.check_parent_occurences(self.Bs[i]['parent'], j)

                    if (N_i_j_k != 0 and N_i_j != 0):
                        result += (N_i_j_k / self.N) * math.log10(N_i_j_k / N_i_j)
                        
        print(result)
        result = -1 * self.N * result
        print("H(Bs, D): ", result)

In [4]:
EntropyMetric().compute_entropy_metric()

0.0
H(Bs, D):  -0.0


In [5]:
pp = pprint.PrettyPrinter(depth=6)

with open('data/weather.arff') as fh:
    data = arff.load(fh)
    #pp.pprint(data['attributes'])
    #print(data['attributes'][0][1])
    
    for i, p in enumerate(data['attributes']):
        print(p[0], "has", len(p[1]), "attributes:", p[1])

outlook has 3 attributes: ['sunny', 'overcast', 'rainy']
temperature has 3 attributes: ['hot', 'mild', 'cool']
humidity has 2 attributes: ['high', 'normal']
windy has 2 attributes: ['TRUE', 'FALSE']
play has 2 attributes: ['yes', 'no']
