In [11]:
from math import log2

def get_unique_values(my_list):    
    unique = []
    for element in my_list:
        if element not in unique:
            unique.append(element)
    return unique

def flatten(my_list):
    return_list = []
    for sublist in my_list:
        for element in sublist:
            return_list.append(element)
    return return_list


def class_entropy(labels):
    unique_labels = get_unique_values(labels)
    class_counts = [0 for _ in range(len(unique_labels))]
    
    for label in labels:
        for i, unique_l in enumerate(unique_labels):
            if unique_l == label:
                class_counts[i] += 1
    
    class_probs = [class_count / len(labels) for class_count in class_counts]
    
    res = 0
    for class_prob in class_probs:
        res += class_prob * log2(class_prob)
    return -res


def gini_index(labels):
    unique_labels = get_unique_values(labels)
    class_counts = [0 for _ in range(len(unique_labels))]
    
    for label in labels:
        for i, unique_l in enumerate(unique_labels):
            if unique_l == label:
                class_counts[i] += 1
    
    class_probs = [class_count/len(labels) for class_count in class_counts]
    res = 0
    for class_prob in class_probs:
        res += class_prob**2
        
    return 1 - res


def information_gain(partitions):
    h_t = class_entropy(flatten(partitions))
    res = 0
    for partition in partitions:
        res += ( len(partition) / len(flatten(partitions)) ) * class_entropy(partition)
    return h_t - res


def gini_index_test(partitions):
    res = 0
    for partition in partitions:
        res += ( len(partition) / len(flatten(partitions)) ) * gini_index(partition)
    return res


classes = [0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0]
partitions = [
    [0, 0, 0, 0, 1, 1, 1],
    [1, 1, 1, 1, 1, 1, 0]
]
gini_index_test(partitions)

0.3673469387755103

![title](imgs/Screenshot_40.png)

For the branch of forecast=sunny, we test wind: True  
For the branch of forecast=sunny, we test humidity: FALSE  



In [12]:
# We make the partion for Sunny:
sunny_labels = ["no", "no", "yes", "yes", "no"]
sunny_humid = [
    ["no", "no", "yes"], # Sunny and high humidity
    ["yes", "no"] # Sunny and normal humidity
]
print(f"Partiotion Sunny -> humidity Gini: {gini_index_test(sunny_humid)}")

sunny_wind = [
    ["no", "yes", "yes"], # Sunny and weak wind
    ["no", "no"]
]

print(f"Partiotion Sunny -> wind Gini: {gini_index_test(sunny_wind)}")

Partiotion Sunny -> humidity Gini: 0.4666666666666667
Partiotion Sunny -> wind Gini: 0.26666666666666666
