In [1]:
import numpy as np

class Node:
    def __init__(self):
        self.value = ""
        self.children = []
        self.entropy = 0.0
        self.sample_count = 0
        self.isLeaf = False
        self.prediction = ""

class ID3DecisionTreeClassifier:
    def entropy(self, data, target_attrib_name):
        entropy = 0.0
        examples_count = data.shape[0]

        for target_value in data[target_attrib_name].unique():
            filtered_examples_count = (data[target_attrib_name] == target_value).sum()
            entropy += -filtered_examples_count/examples_count * np.log2(filtered_examples_count/examples_count)

        return entropy

    def info_gain(self, data, attrib_name, target_attrib_name):
        entropy = self.entropy(data, target_attrib_name)
        sum_entropy_subsets = sum(data[data[attrib_name] == attrib_val].shape[0]/data.shape[0] * self.entropy(data[data[attrib_name] == attrib_val], target_attrib_name) for attrib_val in data[attrib_name].unique())

        return entropy - sum_entropy_subsets

    def fit(self, data, target_attrib_name):
        self.__tree = self.__ID3(data, data.columns[data.columns != target_attrib_name], target_attrib_name)

    def __ID3(self, data, attrib_names, target_attrib_name):
        root = Node()
        root.entropy = self.entropy(data, target_attrib_name)
        root.sample_count = data.shape[0]

        if root.entropy == 0.0:
            root.isLeaf = True
            root.prediction = data[target_attrib_name].unique()
            return root

        best_info_gain, best_attrib = max((self.info_gain(data, attrib_name, target_attrib_name), attrib_name) for attrib_name in attrib_names)

        root.value = best_attrib

        for attrib_unique_value in data[best_attrib].unique():
            data_subset = data[data[best_attrib] == attrib_unique_value]
            new_attrib_names = attrib_names[attrib_names != best_attrib]

            root.children.append({attrib_unique_value: self.__ID3(data_subset, new_attrib_names, target_attrib_name)})

        return root

    def predict(self, data):
        return self.__predict(self.__tree, data)

    def __predict(self, root: Node, data):
        if root.isLeaf:
            return root.prediction
        else:
            for child in root.children:
                if list(child.keys())[0] == data[root.value]:
                    return self.__predict(list(child.values())[0], data)

    def print_tree(self, offset=0):
        self.__print_tree(self.__tree, offset)
        print("\nLEGENDS:")
        print("e: entropy, s: sample count, p: prediction")

    def __print_tree(self, root: Node, offset=0):
        for i in range(offset):
            print("\t", end="")

        if root.isLeaf:
            print("('LEAF') [e: {:.2f}, s: {}, p: {}]".format(root.entropy, root.sample_count, root.prediction))
        else:
            print("({}) [e: {:.2f}, s: {}]".format(root.value, root.entropy, root.sample_count), end="")
            print()
            for child in root.children:
                for i in range(offset + 1):
                    print("\t", end="")
                print(list(child.keys())[0])
                self.__print_tree(list(child.values())[0], offset + 2)


In [3]:
import pandas as pd


In [4]:
data = pd.read_csv(r"C:\Users\sempa\Documents\College_Notes\6TH SEM\Lab\Data\playtennis.csv")
data = data[["Outlook", "Temperature", "Humidity", "Wind", "PlayTennis"]]

In [5]:
id3DecisionTreeClassifier = ID3DecisionTreeClassifier()
id3DecisionTreeClassifier.fit(data, "PlayTennis")
id3DecisionTreeClassifier.print_tree()

(Outlook) [e: 0.94, s: 14]
	Sunny
		(Humidity) [e: 0.97, s: 5]
			High
				('LEAF') [e: 0.00, s: 3, p: ['No']]
			Normal
				('LEAF') [e: 0.00, s: 2, p: ['Yes']]
	Overcast
		('LEAF') [e: 0.00, s: 4, p: ['Yes']]
	Rain
		(Wind) [e: 0.97, s: 5]
			Weak
				('LEAF') [e: 0.00, s: 3, p: ['Yes']]
			Strong
				('LEAF') [e: 0.00, s: 2, p: ['No']]

LEGENDS:
e: entropy, s: sample count, p: prediction


In [6]:
test_data = {"Outlook": "Sunny", "Temperature": "Hot", "Humidity": "Normal", "Wind": "Strong"}
prediction = id3DecisionTreeClassifier.predict(test_data)
print(prediction)

['Yes']
