In [None]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris, load_breast_cancer, load_wine, load_diabetes, make_regression 
from sklearn import tree
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import f1_score, mean_squared_error

# If AdditiveDecisionTree.py is not in the current folder, specify the path 
import sys  
sys.path.insert(0, 'C:\python_projects\AdditiveDecisionTree_project\AdditiveDecisionTree') 
from AdditiveDecisionTree import AdditiveDecisionTreeClasssifier, AdditiveDecisionTreeRegressor

np.random.seed(0)

## Methods used to load the toy datasets

In [None]:
# Classification datasets 

def get_iris():
    iris = load_iris()
    X, y = iris.data, iris.target
    X = pd.DataFrame(X, columns=iris['feature_names'])
    y = pd.Series(y)
    return X, y

def get_breast_cancer():
    X, y = load_breast_cancer(return_X_y=True, as_frame=True)
    return X, y

def get_wine():
    X, y = load_wine(return_X_y=True, as_frame=True)
    return X, y

# Regression datasets

def get_diabetes():
    data = load_diabetes()
    X = pd.DataFrame(data.data, columns=data.feature_names)
    y = pd.Series(data.target)
    return X, y

# def get_linnerud():
#     data = load_linnerud(as_frame=True)
#     X = data.data
#     y = data.target['Weight']
#     return X,y

def get_make_regression():
    np.random.seed(0)
    X, y = make_regression(noise=0.0)
    X = pd.DataFrame(X)
    y = pd.Series(y)
    return X, y

## Example using sklearn's Decision Tree and AddtiveDecisionTree on toy datasets

In [None]:
# Note: this provides only an example of using AdditiveDecisionTree and does not 
# properly test its accuracy. We can, though, see that in terms of test scores,
# ADT (Additive Decision Trees) often do about the same as DT (standard Decsion
# Trees), but sometimes one or the other does better. 
# Training scores are also show to give a sense of overfitting.

# To estimate complexity for DTs, we use the number of nodes
# To estimate complexity for ADTs, we call get_model_complexity(),
# which is similar, but considers that additive nodes are more complex.

def evaluate_model(clf, clf_desc, X_train, X_test, y_train, y_test):
    clf.fit(X_train, y_train)
    y_pred_train = clf.predict(X_train)
    score_train = f1_score(y_train, y_pred_train, average='macro')
    y_pred_test = clf.predict(X_test)
    score_test = f1_score(y_test, y_pred_test, average='macro')
    complexity = 0
    if hasattr(clf, "get_model_complexity"):
        complexity = clf.get_model_complexity()
    elif hasattr(clf, "tree_"):
        complexity = len(clf.tree_.feature)
    print(f"{clf_desc}: Training score: {round(score_train,2)}, Testing score: {round(score_test,2)}, Complexity: {complexity}")

    
def evaluate_dataset(dataset_name, X,y):
    print(f"\n{dataset_name}")
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

    dt_1 = tree.DecisionTreeClassifier(max_depth=4, random_state=42)
    evaluate_model(dt_1, "Standard DT", X_train, X_test, y_train, y_test)

    adt = AdditiveDecisionTreeClasssifier(max_depth=4, allow_additive_nodes=True, verbose_level=0)
    evaluate_model(adt, "Additive DT", X_train, X_test, y_train, y_test)
    return adt
    
    
X,y = get_iris()
evaluate_dataset("Iris", X,y)

X,y = get_wine()
evaluate_dataset("Wine", X,y)

X,y = get_breast_cancer()
adt = evaluate_dataset("Breast Cancer", X,y)

## Summary Output of the AdditiveDecisionTree

In [None]:
# This continues the example with the Breast Cancer dataset.

# The output to explain an Additive Decsion Tree is similar as for
# scikit-learn decision trees, though has slighly more information.
# For example, it provides the depth of each node and the class counts 
# in each node. 

# Here node 3 is an additive node. In the features list, it is specified
# as feature -100. In the Features in addtivie nodes list, we see it
# uses both feature 1 and feature 13. 

adt.output_tree()

## Explanations of Predictions

In [None]:
# This provides explanations (in the form of the decision path)
# for the first five rows. 

exp_arr = adt.get_explanations(X[:5], y[:5])
for exp in exp_arr: 
    print("\n")
    print(exp)

In [None]:
# This gives an example (Row 19) where the decision path includes 
# node 3, which is an additive node. 

exp_arr = adt.get_explanations(X.loc[19:19], y.loc[19:19])
for exp in exp_arr: 
    print("\n")
    print(exp)

## Example wtih Regression

In [None]:
# Note: this provides only an example of using AdditiveDecisionTree and does 
# not properly test its accuracy

# In these examples, the additive decision trees provide slightly lower errors
# but slightly higher complexity.

# In general, Additive Decision Trees tend to work better for classification 
# than regression at least with default hyperparameters.


def evaluate_model(clf, clf_desc, X_train, X_test, y_train, y_test):
    clf.fit(X_train, y_train)
    y_pred_train = clf.predict(X_train)
    score_train = mean_squared_error(y_train, y_pred_train)
    y_pred_test = clf.predict(X_test)
    score_test = mean_squared_error(y_test, y_pred_test)
    complexity = 0
    if hasattr(clf, "get_model_complexity"):
        complexity = clf.get_model_complexity()
    elif hasattr(clf, "tree_"):
        complexity = len(clf.tree_.feature)
    print(f"{clf_desc}: Training MSE: {round(score_train,2)}, Testing MSE: {round(score_test,2)}, Complexity: {complexity}")

    
def evaluate_dataset(dataset_name, X,y):
    print(f"\n{dataset_name}")
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

    dt_1 = tree.DecisionTreeRegressor(max_depth=4, min_samples_leaf=5, random_state=42)
    evaluate_model(dt_1, "Standard DT", X_train, X_test, y_train, y_test)

    adt = AdditiveDecisionTreeRegressor(max_depth=4, min_samples_leaf=5, allow_additive_nodes=True, verbose_level=0)
    evaluate_model(adt, "Additive DT", X_train, X_test, y_train, y_test)
    return adt
  
    
X,y = get_diabetes()
adt = evaluate_dataset("Diabetes", X, y)

X,y = get_make_regression()
adt = evaluate_dataset("Make Regression", X, y)

In [None]:
adt.output_tree()

## Example Tuning Hyperparameters with a Cross Validated Grid Search

In [None]:
# Note: this can be several minutes to execute.

X,y = get_diabetes()
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

parameters = {
    'min_samples_split': (5,10,25,50), 
    'min_samples_leaf': (5,10,15),
    'max_depth': (4,5,6,7),
    'allow_additive_nodes': (True, False),
    'max_added_splits_per_node': (2,3,4,5,10)
}

estimator = AdditiveDecisionTreeRegressor(max_depth=4, min_samples_leaf=5)
gs_estimator = RandomizedSearchCV(estimator, parameters, scoring='neg_mean_squared_error',n_iter=100)
gs_estimator.fit(X_train, y_train)
y_pred = gs_estimator.predict(X_test)
test_score = mean_squared_error(list(y_pred), list(y_test)) 

print("test_score: ", test_score)
print("best estimator: ", gs_estimator.best_estimator_)

In [None]:
# Create an instance of the best model found during tuning

adt = AdditiveDecisionTreeRegressor(
        min_samples_split=25, 
        min_samples_leaf=15, 
        max_depth=5, 
        allow_additive_nodes=True, 
        max_added_splits_per_node=5)
adt.fit(X_train, y_train)

adt.get_model_complexity()