In [1]:
import numpy as np
import pandas as pd

In [2]:
class Node:
    
    def __init__(self,node_type, value):
        
        self.node_type = node_type
        
        if node_type == "condition":
            self.attribute = value
            self.branches = {}
        else:
            self.value = value

In [49]:
class Decision_Tree:
    
    def calculate_entropy(self,data, target):
        
        labels = data[target].unique()
        label_count = data[target].value_counts()
        
        entropy = 0
        
        for label in labels:
            temp = label_count[label]/data.shape[0]
            entropy += temp * np.log2(temp)
        
        return -entropy
    
    def calculate_gain(self, data, feature, target):
        
        values = data[feature].unique()
        value_counts = data[feature].value_counts()
        
        gain = 0
        
        for value in values:
            
            entropy = self.calculate_entropy(data[data[feature]==value],target)
            gain += (value_counts[value]/data.shape[0]) * entropy
        
        return gain
    
    def calculate_infogain(self, data, feature, target):
        
        return self.entropy - self.calculate_gain(data, feature, target)
        
    def classify(self, data, target):
        
        self.entropy = self.calculate_entropy(data, target)
        self.features = data.drop(target,axis=1).columns
        
        if len(self.features) == 0:
            return
        
        max_gain = 0
        split_attribute = self.features[0]
        
        for feature in self.features:
            
            infogain = self.calculate_infogain(data,feature,target)
            
            if max_gain < infogain:
                max_gain = infogain
                split_attribute = feature
        
        feature_values = data[split_attribute].unique()
        node = Node("condition",split_attribute)
        
        print(f"Condition : {split_attribute}\nInformantion Gain : {round(max_gain,2)}\nBranches : {feature_values}\n")
        
        for value in feature_values:
            
            filtered_data = data[data[split_attribute]==value].drop(split_attribute,axis=1)
            
            if len(filtered_data[target].unique()) == 1:
                print(f"Result : {filtered_data[target].unique()[0]}\n")
                node.branches[value] = Node("result",filtered_data[target].unique()[0])
            else:
                node.branches[value] = self.classify(filtered_data,target)
        
        return node           
    
    def predict(self, data, target, root):
        
        features = data.drop(target,axis=1).columns
        results = []
        for i in range(data.shape[0]):
            
            current = root
            while True:
                
                if current.node_type == "condition":
                    current = current.branches[data.iloc[i][current.attribute]]
                else:
                    results.append(current.value)
                    break
        return results

In [50]:
class Decision_Tree_Classifier:
    
    def __init__(self):
        
        self.model = Decision_Tree()
    
    def fit(self, data, target):
        
        self.root = self.model.classify(data, target)
    
    def predict(self,data, target):
        
        return self.model.predict(data, target, self.root)
        

In [51]:
data = pd.read_csv("./Datasets/data 2.csv")

In [52]:
data.head()

Unnamed: 0,Outlook,Temperature,Humidity,Wind,Play Tennis
0,Sunny,Hot,High,Weak,No
1,Sunny,Hot,High,Strong,No
2,Overcast,Hot,High,Weak,Yes
3,Rain,Mild,High,Weak,Yes
4,Rain,Cool,Normal,Weak,Yes


In [53]:
model = Decision_Tree_Classifier()

In [54]:
model.fit(data,"Play Tennis")

Condition : Outlook
Informantion Gain : 0.25
Branches : ['Sunny' 'Overcast' 'Rain']

Condition : Humidity
Informantion Gain : 0.97
Branches : ['High' 'Normal']

Result : No

Result : Yes

Result : Yes

Condition : Wind
Informantion Gain : 0.97
Branches : ['Weak' 'Strong']

Result : Yes

Result : No



In [55]:
model.predict(data[0:5],"Play Tennis")

['No', 'No', 'Yes', 'Yes', 'Yes']

In [56]:
data[0:5]

Unnamed: 0,Outlook,Temperature,Humidity,Wind,Play Tennis
0,Sunny,Hot,High,Weak,No
1,Sunny,Hot,High,Strong,No
2,Overcast,Hot,High,Weak,Yes
3,Rain,Mild,High,Weak,Yes
4,Rain,Cool,Normal,Weak,Yes
