# Decision Tree

## 1. Regression Case

In [1]:
import sys
sys.path.append("../")
import numpy as np

from src.tree import DecisionTreeRegressor
from src.metrics import mean_squared_error

from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split

In [2]:
# LOAD DATA
X, y = load_diabetes(return_X_y = True)

# SPLIT DATA
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size = 0.3,
                                                    random_state = 42)


# CLASSIFY - A Very Fit Tree
# Create a decision tree classfier
clf = DecisionTreeRegressor()
clf.fit(X_train, y_train)

# Plot tree
clf._export_tree()
print("")

# Predict & calculate accuracy score test
y_pred_train = clf.predict(X_train)
y_pred_test = clf.predict(X_test)
print(f"MSE score Train : {mean_squared_error(y_train, y_pred_train):.4f}")
print(f"MSE score Test  : {mean_squared_error(y_test, y_pred_test):.4f}")
print("")
print("")


# CLASSIFY - A Simple Tree
# Create a decision tree classfier
clf = DecisionTreeRegressor(max_depth=3)
clf.fit(X_train, y_train)

# Plot tree
clf._export_tree()
print("")

# Predict & calculate accuracy score test
y_pred_train = clf.predict(X_train)
y_pred_test = clf.predict(X_test)
print(f"MSE score Train : {mean_squared_error(y_train, y_pred_train):.4f}")
print(f"MSE score Test  : {mean_squared_error(y_test, y_pred_test):.4f}")
print("")
print("")


The Decision Tree
-----------------
feature_2 <= 0.01?
|   |T: feature_8 <= 0.01?
|   |   |T: feature_5 <= 0.09?
|   |   |   |T: feature_8 <= -0.04?
|   |   |   |   |T: feature_4 <= -0.04?
|   |   |   |   |   |T: feature_4 <= -0.05?
|   |   |   |   |   |   |T: feature_0 <= -0.00?
|   |   |   |   |   |   |   |T: feature_6 <= 0.04?
|   |   |   |   |   |   |   |   |T: feature_2 <= -0.08?
|   |   |   |   |   |   |   |   |   |T: feature_0 <= -0.09?
|   |   |   |   |   |   |   |   |   |   |T: Pred: 101.00
|   |   |   |   |   |   |   |   |   |   |F: Pred: 116.00
|   |   |   |   |   |   |   |   |   |F: feature_6 <= 0.00?
|   |   |   |   |   |   |   |   |   |   |T: feature_2 <= -0.07?
|   |   |   |   |   |   |   |   |   |   |   |T: Pred: 55.00
|   |   |   |   |   |   |   |   |   |   |   |F: feature_5 <= -0.07?
|   |   |   |   |   |   |   |   |   |   |   |   |T: Pred: 83.00
|   |   |   |   |   |   |   |   |   |   |   |   |F: feature_0 <= -0.04?
|   |   |   |   |   |   |   |   |   |   |   |   |  

## 2. Classification Case

In [3]:
import sys
sys.path.append("../")
import numpy as np

from src.tree import DecisionTreeClassifier
from src.metrics import accuracy_score

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

In [4]:
# LOAD DATA
iris = load_iris()
X = iris.data
y = np.where(iris.target==2,
             1,
             0)


# SPLIT DATA
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size = 0.3,
                                                    stratify = y,
                                                    random_state = 42)



# CLASSIFY - A Very Fit Tree
# Create a decision tree classfier
clf = DecisionTreeClassifier()
clf.fit(X_train, y_train)

# Plot tree
clf._export_tree()
print("")

# Predict & calculate accuracy score test
y_pred_train = clf.predict(X_train)
y_pred_test = clf.predict(X_test)
print(f"Accuracy score Train : {accuracy_score(y_train, y_pred_train):.4f}")
print(f"Accuracy score Test  : {accuracy_score(y_test, y_pred_test):.4f}")
print("")
print("")


# CLASSIFY - A Simple Tree
# Create a decision tree classfier
clf = DecisionTreeClassifier(max_depth=3)
clf.fit(X_train, y_train)

# Plot tree
clf._export_tree()
print("")

# Predict & calculate accuracy score test
y_pred_train = clf.predict(X_train)
y_pred_test = clf.predict(X_test)
print(f"Accuracy score Train : {accuracy_score(y_train, y_pred_train):.4f}")
print(f"Accuracy score Test  : {accuracy_score(y_test, y_pred_test):.4f}")
print("")
print("")

The Decision Tree
-----------------
feature_3 <= 1.75?
|   |T: feature_0 <= 7.10?
|   |   |T: feature_1 <= 2.25?
|   |   |   |T: feature_2 <= 4.75?
|   |   |   |   |T: Pred: 0.00
|   |   |   |   |F: Pred: 1.00
|   |   |   |F: Pred: 0.00
|   |   |F: Pred: 1.00
|   |F: feature_2 <= 4.85?
|   |   |T: feature_0 <= 6.05?
|   |   |   |T: Pred: 0.00
|   |   |   |F: Pred: 1.00
|   |   |F: Pred: 1.00

Accuracy score Train : 1.0000
Accuracy score Test  : 0.9111


The Decision Tree
-----------------
feature_3 <= 1.75?
|   |T: feature_0 <= 7.10?
|   |   |T: feature_1 <= 2.25?
|   |   |   |T: Pred: 0.00
|   |   |   |F: Pred: 0.00
|   |   |F: Pred: 1.00
|   |F: feature_2 <= 4.85?
|   |   |T: feature_0 <= 6.05?
|   |   |   |T: Pred: 0.00
|   |   |   |F: Pred: 1.00
|   |   |F: Pred: 1.00

Accuracy score Train : 0.9905
Accuracy score Test  : 0.9111


