In [5]:
import numpy as np

def entropy(y):
    p = np.mean(y)
    if p == 0 or p == 1:
        return 0
    return -p*np.log2(p) - (1-p)*np.log2(1-p)

def information_gain_ratio(y, y_left, y_right):
    h = entropy(y)
    h_left = entropy(y_left)
    h_right = entropy(y_right)
    if h == 0:
        return 0
    return (h - len(y_left)/len(y)*h_left - len(y_right)/len(y)*h_right) / h


def split_data(X, y, j, c):
    left_idx = X[:, j] >= c
    right_idx = ~left_idx
    return X[left_idx], y[left_idx], X[right_idx], y[right_idx]

def decision_tree(X, y):
    if len(set(y)) == 1 or len(y) == 0:
        return {'prediction': 1 if y.sum() >= len(y)/2 else 0}
    
    best_igr = 0
    best_split = None
    
    for j in range(X.shape[1]):
        for c in set(X[:, j]):
            X_left, y_left, X_right, y_right = split_data(X, y, j, c)
            igr = information_gain_ratio(y, y_left, y_right)
            if igr > best_igr:
                best_igr = igr
                best_split = (j, c)
    
    if best_igr == 0:
        return {'prediction': 1 if y.sum() >= len(y)/2 else 0}
    
    X_left, y_left, X_right, y_right = split_data(X, y, *best_split)
    return {
        'split': best_split,
        'left': decision_tree(X_left, y_left),
        'right': decision_tree(X_right, y_right)
    }

def train_decision_tree(file_path):
    data = np.loadtxt(file_path)
    X = data[:, :-1]
    y = data[:, -1]
    return decision_tree(X, y)

tree1 = train_decision_tree('D1.txt')
tree2 = train_decision_tree('D2.txt')

In [6]:
print(tree1)
print(tree2)

{'split': (1, 0.201829), 'left': {'prediction': 1}, 'right': {'prediction': 0}}
{'split': (0, 0.533076), 'left': {'split': (1, 0.383738), 'left': {'split': (0, 0.550364), 'left': {'prediction': 1}, 'right': {'split': (1, 0.474971), 'left': {'prediction': 1}, 'right': {'prediction': 0}}}, 'right': {'split': (0, 0.761423), 'left': {'split': (1, 0.191206), 'left': {'prediction': 1}, 'right': {'split': (0, 0.90482), 'left': {'split': (1, 0.037708), 'left': {'split': (0, 0.930371), 'left': {'prediction': 1}, 'right': {'split': (0, 0.927522), 'left': {'prediction': 0}, 'right': {'prediction': 1}}}, 'right': {'prediction': 0}}, 'right': {'split': (1, 0.169053), 'left': {'split': (0, 0.850316), 'left': {'prediction': 1}, 'right': {'prediction': 0}}, 'right': {'prediction': 0}}}}, 'right': {'split': (1, 0.301105), 'left': {'split': (0, 0.66337), 'left': {'prediction': 1}, 'right': {'prediction': 0}}, 'right': {'prediction': 0}}}}, 'right': {'split': (1, 0.639018), 'left': {'split': (0, 0.111076

In [None]:
For text "D1"

        [x[1] >= 0.201829]
          /             \
         /               \
[Predict 1]           [Predict 0]
(Left Leaf)          (Right Leaf)


In [None]:
For text "D2"

[x[0] >= 0.533076]
                |              
        ________|________      
       |                 |     
[x[1] >= 0.383738]   [x[1] >= 0.639018]
       |                 |      
   ____|____          ___|___   
  |         |        |       |  
[x[0] >= 0.550364]  [x[0] >= 0.111076] [Predict 0]
  |                      |
  |___                ___|
  |   |              |   |
[...] [Predict 0]  [x[1] >= 0.861] [Predict 0]
                        |
                     ___|___
                    |       |
                  [x[0] >= 0.33046] [Predict 0]
                    |    
                 ___|___
                |       |
              [Predict 1] [x[1] >= 0.745406]
                          |
                          |
                       ___|___
                      |       |
                    [x[0] >= 0.254049] [Predict 0]
                      |
                   ___|___
                  |       |
                [Predict 1] [x[0] >= 0.191915]
                              |
                              |
                            __|__
                           |     |
                         [x[1] >= 0.792752] [Predict 0]
                           |
                         __|__
                        |     |
                      [Predict 1] [Predict 0]
