In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from base import DecisionTree
from metrics import *

np.random.seed(42)

In [7]:
N = 30
P = 5
X = pd.DataFrame(np.random.randn(N, P))
y = pd.Series(np.random.randn(N))


REAL INPUT AND REAL OUTPUT

In [8]:
for criteria in ["information_gain", "gini_index"]:
    print(f"Running Decision Tree with {criteria} criterion...\n")
    tree = DecisionTree()
    tree.fit(X, y)
    y_hat = tree.predict(X)
    tree.plot()
    print(f"Criteria: {criteria}")
    print("RMSE:", rmse(y_hat, y))
    print("MAE:", mae(y_hat, y))

Running Decision Tree with information_gain criterion...

 [2 <= 0.674252023587605]
    [3 <= 0.7889120340559967]
       [2 <= -0.29307759077898854]
          [0 <= 1.1613310755634525]
             [0 <= -0.38171895281445334]
                Decision: -1.8308153741936848
             [0 > -0.38171895281445334]
                Decision: -2.245667198993836
          [0 > 1.1613310755634525]
             Decision: -0.8783711870927843
       [2 > -0.29307759077898854]
          [1 <= 1.9025179131435808]
             [4 <= -1.5445374280655955]
                Decision: -1.6990072048436677
             [4 > -1.5445374280655955]
                Decision: -0.3627718931380622
          [1 > 1.9025179131435808]
             Decision: -1.7618637420397385
    [3 > 0.7889120340559967]
       [2 <= -1.3424972483641633]
          [0 <= -0.1982853129157069]
             Decision: -0.6309921298739474
          [0 > -0.1982853129157069]
             [0 <= 0.2029685381040715]
                Decision: -0

REAL INPUT AND DISCRETE OUTPUT

In [9]:
X = pd.DataFrame(np.random.randn(N, P))
y = pd.Series(np.random.randint(P, size=N), dtype="category")

for criteria in ["information_gain", "gini_index"]:
    print(f"Running Decision Tree with {criteria} criterion...\n")
    tree = DecisionTree(criterion=criteria)
    tree.fit(X, y)
    y_hat = tree.predict(X)
    tree.plot()
    print(f"Criteria: {criteria}")
    print("Accuracy:", accuracy(y_hat, y))
    for cls in y.unique():
        print(f"Class {cls} - Precision:", precision(y_hat, y, cls))
        print(f"Class {cls} - Recall:", recall(y_hat, y, cls))

Running Decision Tree with information_gain criterion...

 [1 <= -0.1291699916066525]
    [0 <= -0.2351340565712592]
       [2 <= 0.9031349022374484]
          [4 <= 0.37304869258456097]
             Decision: 3
          [4 > 0.37304869258456097]
             Decision: 0
       [2 > 0.9031349022374484]
          Decision: 2
    [0 > -0.2351340565712592]
       [0 <= 0.17573481274179076]
          [0 <= -0.1862932275219108]
             Decision: 1
          [0 > -0.1862932275219108]
             Decision: 2
       [0 > 0.17573481274179076]
          Decision: 1
 [1 > -0.1291699916066525]
    [2 <= -0.27238126516092]
       [1 <= 0.4220911753473172]
          Decision: 4
       [1 > 0.4220911753473172]
          [1 <= 1.8564329878755206]
             Decision: 1
          [1 > 1.8564329878755206]
             Decision: 4
    [2 > -0.27238126516092]
       [4 <= -0.6043864417715281]
          [0 <= -0.1173285103074535]
             Decision: 0
          [0 > -0.1173285103074535]
       

DISCRETE INPUT AND DISCRETE OUTPUT

In [10]:
X = pd.DataFrame({i: pd.Series(np.random.randint(P, size=N), dtype="category") for i in range(5)})
y = pd.Series(np.random.randint(P, size=N), dtype="category")

for criteria in ["information_gain", "gini_index"]:
    print(f"Running Decision Tree with {criteria} criterion...\n")
    
    # Initialize the tree with the chosen criterion
    tree = DecisionTree(criterion=criteria)
    
    # Fit the tree (no need to pass criterion here again)
    tree.fit(X, y)
    
    # Predict and evaluate
    y_hat = tree.predict(X)
    
    # Plot the tree
    tree.plot()
    
    # Print evaluation metrics
    print(f"Criteria: {criteria}")
    print("Accuracy:", accuracy(y_hat, y))
    for cls in y.unique():
        print(f"Class {cls} - Precision:", precision(y_hat, y, cls))
        print(f"Class {cls} - Recall:", recall(y_hat, y, cls))

Running Decision Tree with information_gain criterion...

 [3]
 - 3:
    [0]
    - 2:
       [1]
       - 2:
          Decision: 0
       - 0:
          Decision: 1
    - 3:
       Decision: 1
    - 1:
       Decision: 3
    - 0:
       Decision: 4
 - 4:
    [2]
    - 0:
       Decision: 3
    - 2:
       Decision: 0
    - 4:
       Decision: 0
    - 1:
       Decision: 3
    - 3:
       Decision: 4
 - 1:
    [1]
    - 1:
       Decision: 4
    - 3:
       [2]
       - 1:
          Decision: 2
       - 0:
          Decision: 1
    - 0:
       [2]
       - 0:
          Decision: 4
       - 4:
          Decision: 2
       - 3:
          Decision: 2
    - 2:
       Decision: 1
    - 4:
       Decision: 1
 - 0:
    [4]
    - 0:
       [2]
       - 4:
          Decision: 0
       - 0:
          Decision: 3
    - 4:
       [0]
       - 0:
          Decision: 3
       - 3:
          Decision: 4
    - 3:
       Decision: 0
    - 1:
       Decision: 4
    - 2:
       Decision: 1
 - 2:
    [1]
 

DISCRETE INPUT AND REAL OUTPUT

In [14]:
X = pd.DataFrame({i: pd.Series(np.random.randint(P, size=N), dtype="category") for i in range(5)})
y = pd.Series(np.random.randn(N))

for criteria in ["information_gain", "gini_index"]:
    print(f"Running Decision Tree with {criteria} criterion...\n")
    tree = DecisionTree(criterion=criteria)
    tree.fit(X, y)
    y_hat = tree.predict(X)
    tree.plot()
    print(f"Criteria: {criteria}")
    print("RMSE:", rmse(y_hat, y))
    print("MAE:", mae(y_hat, y))

Running Decision Tree with information_gain criterion...

 [3]
 - 4:
    [0]
    - 1:
       Decision: 0.09643371879348994
    - 3:
       [1]
       - 3:
          [0]
          - 3:
             [0]
             - 3:
                Decision: -0.7590808561257798
       - 0:
          Decision: -1.6010192977017523
    - 2:
       [1]
       - 1:
          Decision: 1.3140094746438993
       - 4:
          Decision: 0.7080891218566765
    - 4:
       Decision: -1.2145434897264067
    - 0:
       Decision: 0.07518708760618616
 - 0:
    [2]
    - 0:
       [0]
       - 4:
          Decision: -0.6808872820390818
       - 2:
          Decision: -0.3067803928713705
    - 4:
       Decision: -0.5339091013167999
    - 3:
       [0]
       - 2:
          Decision: -1.4071440262149164
       - 4:
          Decision: 0.5037360462124257
    - 2:
       Decision: 0.3616765854236551
 - 2:
    [0]
    - 3:
       [1]
       - 1:
          Decision: 0.6294135848867876
       - 4:
          Decision: 