### This is the code for training the decision trees.
### Here the prediction is done on the train data itself.
### So the Precision, Recall, Accuracy would be 1 in case of Classification. RMSE, MAE will be very close to 0 in case of Regression.

In [10]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tree.base import DecisionTree
from tree.utils import *
from metrics import *

In [11]:
np.random.seed(42)

In [12]:
# Test case 1
# Real Input and Real Output

N = 30
P = 5
X = pd.DataFrame(np.random.randn(N, P))
y = pd.Series(np.random.randn(N))

for criteria in ["information_gain"]:
    tree = DecisionTree(criterion=criteria)  # Split based on Inf. Gain
    tree.fit(X, y)
    y_hat = tree.predict(X)
    print("##########################################################")
    print("Real Input and Real Output")
    tree.plot()
    print("Criteria :", criteria)
    print("RMSE: ", rmse(y_hat, y))
    print("MAE: ", mae(y_hat, y))
    print("##########################################################\n")

##########################################################
Real Input and Real Output
?( X( Attribute(1) ) < -1.1939721441501923):
         value = 2.720169166589619, depth = 1
?( X( Attribute(1) ) > -1.1939721441501923):
    ?( X( Attribute(4) ) < 0.3865751136808898):
        ?( X( Attribute(0) ) < 1.5077915869695468):
            ?( X( Attribute(4) ) < -1.7439789939378834):
                ?( X( Attribute(1) ) < 0.8125418173520702):
                    ?( X( Attribute(0) ) < 0.051029147990257595):
                             value = 0.6565536086338297, depth = 6
                    ?( X( Attribute(0) ) > 0.051029147990257595):
                             value = 0.4127809269364983, depth = 6
                ?( X( Attribute(1) ) > 0.8125418173520702):
                         value = 0.9633761292443218, depth = 5
            ?( X( Attribute(4) ) > -1.7439789939378834):
                ?( X( Attribute(2) ) < 1.24071347131677):
                    ?( X( Attribute(3) ) < -0.85515067237

In [13]:
# Test case 2
# Real Input and Discrete Output

N = 30
P = 5
X = pd.DataFrame(np.random.randn(N, P))
y = pd.Series(np.random.randint(P, size=N), dtype="category")

for criteria in ["information_gain"]:
    tree = DecisionTree(criterion=criteria)  # Split based on Inf. Gain
    tree.fit(X, y)
    y_hat = tree.predict(X)
    print("##########################################################")
    print("Real Input and Discrete Output")
    tree.plot()
    print("Criteria :", criteria)
    print("Accuracy: ", accuracy(y_hat, y))
    for cls in y.unique():
        print("Precision: ", precision(y_hat, y, cls))
        print("Recall: ", recall(y_hat, y, cls))
    print("##########################################################\n")

##########################################################
Real Input and Discrete Output
?( X( Attribute(0) ) < 0.5164969924782189):
    ?( X( Attribute(1) ) < 1.0083193995209832):
        ?( X( Attribute(2) ) < -0.4498157567688338):
            ?( X( Attribute(0) ) < -0.6307158537095157):
                     value = 0, depth = 4
            ?( X( Attribute(0) ) > -0.6307158537095157):
                ?( X( Attribute(0) ) < -0.27131174309911177):
                         value = 1, depth = 5
                ?( X( Attribute(0) ) > -0.27131174309911177):
                    ?( X( Attribute(0) ) < 0.27954971962799446):
                             value = 2, depth = 6
                    ?( X( Attribute(0) ) > 0.27954971962799446):
                             value = 1, depth = 6
        ?( X( Attribute(2) ) > -0.4498157567688338):
            ?( X( Attribute(2) ) < -0.13234745834482112):
                     value = 4, depth = 4
            ?( X( Attribute(2) ) > -0.13234745834482112)

In [14]:
# Test case 3
# Discrete Input and Discrete Output

N = 30
P = 5
X = pd.DataFrame({i: pd.Series(np.random.randint(P, size=N), dtype="category") for i in range(5)})
y = pd.Series(np.random.randint(P, size=N), dtype="category")

for criteria in ["information_gain"]:
    tree = DecisionTree(criterion=criteria)  # Split based on Inf. Gain
    tree.fit(X, y)
    y_hat = tree.predict(X)
    print("##########################################################")
    print("Discrete Input and Discrete Output")
    tree.plot()
    print("Criteria :", criteria)
    print("Accuracy: ", accuracy(y_hat, y))
    for cls in y.unique():
        print("Precision: ", precision(y_hat, y, cls))
        print("Recall: ", recall(y_hat, y, cls))
    print("##########################################################\n")

##########################################################
Discrete Input and Discrete Output
?( X( Attribute(1) ) = 0):
    ?( X( Attribute(4) ) = 4):
        ?( X( Attribute(0) ) = 0):
                 value = 4, depth = 3
        ?( X( Attribute(0) ) = 3):
                 value = 0, depth = 3
    ?( X( Attribute(4) ) = 1):
        ?( X( Attribute(0) ) = 3):
            ?( X( Attribute(0) ) = 3):
                ?( X( Attribute(0) ) = 3):
                    ?( X( Attribute(0) ) = 3):
                        ?( X( Attribute(0) ) = 3):
                            ?( X( Attribute(0) ) = 3):
                                ?( X( Attribute(0) ) = 3):
                                    ?( X( Attribute(0) ) = 3):
                                             value = 0, depth = 10
    ?( X( Attribute(4) ) = 2):
             value = 4, depth = 2
    ?( X( Attribute(4) ) = 0):
             value = 0, depth = 2
?( X( Attribute(1) ) = 4):
    ?( X( Attribute(0) ) = 2):
             value = 1, 

In [15]:
# Test case 4
# Discrete Input and Real Output

N = 30
P = 5
X = pd.DataFrame({i: pd.Series(np.random.randint(P, size=N), dtype="category") for i in range(5)})
y = pd.Series(np.random.randn(N))

for criteria in ["information_gain"]:
    tree = DecisionTree(criterion=criteria)  # Split based on Inf. Gain
    tree.fit(X, y)
    y_hat = tree.predict(X)
    print("##########################################################")
    print("Discrete Input and Real Output")
    tree.plot()
    print("Criteria :", criteria)
    print("RMSE: ", rmse(y_hat, y))
    print("MAE: ", mae(y_hat, y))
    print("##########################################################")

##########################################################
Discrete Input and Real Output
?( X( Attribute(4) ) = 2):
    ?( X( Attribute(0) ) = 4):
        ?( X( Attribute(1) ) = 0):
                 value = -0.5768918695231487, depth = 3
        ?( X( Attribute(1) ) = 1):
                 value = -0.32602353216784113, depth = 3
        ?( X( Attribute(1) ) = 3):
                 value = 0.8711247034316923, depth = 3
    ?( X( Attribute(0) ) = 1):
        ?( X( Attribute(1) ) = 0):
                 value = -0.42098448082026296, depth = 3
        ?( X( Attribute(1) ) = 3):
                 value = 0.5298041779152828, depth = 3
    ?( X( Attribute(0) ) = 2):
             value = -2.4716445001272893, depth = 2
    ?( X( Attribute(0) ) = 3):
             value = 0.08658978747289992, depth = 2
?( X( Attribute(4) ) = 0):
    ?( X( Attribute(0) ) = 1):
        ?( X( Attribute(1) ) = 3):
                 value = 1.2012139221639448, depth = 3
        ?( X( Attribute(1) ) = 4):
                 