In [9]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tree.base import DecisionTree
from metrics import *

np.random.seed(42)

In [10]:
# Test case 1
# Real Input and Real Output

N = 30
P = 5
X = pd.DataFrame(np.random.randn(N, P))
y = pd.Series(np.random.randn(N))


for criteria in ["information_gain", "gini_index"]:
    tree = DecisionTree(criterion=criteria)  # Split based on Inf. Gain
    tree.fit(X, y)
    y_hat = tree.predict(X)
    tree.plot()
    print("Criteria :", criteria)
    print("RMSE: ", rmse(y_hat, y))
    print("MAE: ", mae(y_hat, y))

Criteria : information_gain
RMSE:  0.35058133577908407
MAE:  0.2790232592413352
Criteria : gini_index
RMSE:  0.660664966269471
MAE:  0.46294685207331393


In [11]:
# Test case 2
# Real Input and Discrete Output

N = 30
P = 5
X = pd.DataFrame(np.random.randn(N, P))
y = pd.Series(np.random.randint(P, size=N), dtype="category")
# print(X)
# print(y)

for criteria in ["information_gain", "gini_index"]:
    tree = DecisionTree(criterion=criteria)  # Split based on Inf. Gain
    tree.fit(X, y)
    y_hat = tree.predict(X)
    tree.plot()
    print("Criteria :", criteria)
    # print(y_hat)
    print("Accuracy: ", accuracy(y_hat, y))
    for cls in y.unique():
        print("Precision: ", precision(y_hat, y, cls))
        print("Recall: ", recall(y_hat, y, cls))


Criteria : information_gain
Accuracy:  0.8333333333333334
Precision:  1.0
Recall:  0.8
Precision:  0.75
Recall:  0.9
Precision:  1.0
Recall:  1.0
Precision:  0.6666666666666666
Recall:  1.0
Precision:  0.5
Recall:  0.3333333333333333
Criteria : gini_index
Accuracy:  0.8
Precision:  1.0
Recall:  0.8
Precision:  0.7142857142857143
Recall:  1.0
Precision:  1.0
Recall:  0.6
Precision:  0.6666666666666666
Recall:  1.0
Precision:  0.5
Recall:  0.3333333333333333


In [12]:
# Test case 3
# Discrete Input and Discrete Output

N = 30
P = 5
X = pd.DataFrame({i: pd.Series(np.random.randint(P, size=N), dtype="category") for i in range(5)})
y = pd.Series(np.random.randint(P, size=N), dtype="category")
# print(X)
# print(y)

for criteria in ["information_gain", "gini_index"]:
    tree = DecisionTree(criterion=criteria)  # Split based on Inf. Gain
    tree.fit(X, y)
    y_hat = tree.predict(X)
    tree.plot()
    print("Criteria :", criteria)
    # print(y_hat)
    print("Accuracy: ", accuracy(y_hat, y))
    for cls in y.unique():
        print("Precision: ", precision(y_hat, y, cls))
        print("Recall: ", recall(y_hat, y, cls))

Criteria : information_gain
Accuracy:  0.4666666666666667
Precision:  0.5
Recall:  0.42857142857142855
Precision:  1.0
Recall:  0.5
Precision:  0.4
Recall:  0.8888888888888888
Precision:  0.0
Recall:  0.0
Precision:  0.5
Recall:  0.2
Criteria : gini_index
Accuracy:  0.5333333333333333
Precision:  0.35714285714285715
Recall:  0.7142857142857143
Precision:  1.0
Recall:  0.5
Precision:  0.5714285714285714
Recall:  0.4444444444444444
Precision:  0.6666666666666666
Recall:  0.8
Precision:  1.0
Recall:  0.2


In [13]:
# Test case 4
# Discrete Input and Real Output

N = 30
P = 5
X = pd.DataFrame({i: pd.Series(np.random.randint(P, size=N), dtype="category") for i in range(5)})
y = pd.Series(np.random.randn(N))

for criteria in ["information_gain", "gini_index"]:
    tree = DecisionTree(criterion=criteria)  # Split based on Inf. Gain
    tree.fit(X, y)
    y_hat = tree.predict(X)
    tree.plot()
    print("Criteria :", criteria)
    print("RMSE: ", rmse(y_hat, y))
    print("MAE: ", mae(y_hat, y))


Criteria : information_gain
RMSE:  0.9888720962275395
MAE:  0.6163301471846003
Criteria : gini_index
RMSE:  0.8972587325956433
MAE:  0.5888764841639892
