In [None]:
import numpy as np
import matplotlib.pyplot as plt
import os
import seaborn as sns; sns.set()
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('ggplot')

### An exmaple for createing a decision tree in Sklearn

In [None]:
from sklearn.datasets import make_blobs

X, y = make_blobs(n_samples=300, centers=4,
                  random_state=0, cluster_std=1.0)

fig = plt.figure(figsize = (12,8))
ax = plt.axes()
ax.scatter(X[:,0], X[:,1], c=y, cmap='rainbow');

In [None]:
from sklearn.tree import DecisionTreeClassifier
tree_depth2 = DecisionTreeClassifier(max_depth=2).fit(X, y)


In [None]:
tree_depth2.classes_

In [None]:
tree_depth2.max_depth

In [None]:
def plot_decision_boundary(clf, X, y, axes=[0, 7.5, 0, 3]):
    
    x1s = np.linspace(axes[0], axes[1], 100)
    x2s = np.linspace(axes[2], axes[3], 100)
    x1, x2 = np.meshgrid(x1s, x2s)
    X_new = np.c_[x1.ravel(), x2.ravel()]
    y_pred = clf.predict(X_new).reshape(x1.shape)
    fig = plt.figure(figsize=(12,8))
    ax = plt.axes()
    ax.contourf(x1, x2, y_pred, alpha=0.3, cmap='rainbow')
    unique_y = np.unique(y)
    style_list = ['yo', 'ro', 'bs', 'g^', 'k*', 'r>']
    for i in range(len(unique_y)):
        ax.plot(X[:, 0][y==unique_y[i]], X[:, 1][y==unique_y[i]], style_list[i])
   
    ax.set_xlabel(r"$x_1$", fontsize=18)
    ax.set_ylabel(r"$x_2$", fontsize=18, rotation=0)
    ax.axis(axes)

plot_decision_boundary(tree_depth2, X, y,[-5,6,-3,10])
plt.title('MAX Depth = 2');

In [None]:
tree_depth3 = DecisionTreeClassifier(max_depth=3).fit(X, y)
plot_decision_boundary(tree_depth3, X, y,[-5,6,-3,10])
plt.title('MAX Depth = 3');

In [None]:
tree_depth4 = DecisionTreeClassifier(max_depth=50).fit(X, y)
plot_decision_boundary(tree_depth4, X, y,[-5,6,-3,10])
plt.title('MAX Depth = 50');

###  Decision tree regression

In [None]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error

#### If no restriction, decision tree regression will tend to do point-wise prediction

In [None]:
np.random.seed(42)
m = 200
X = np.random.rand(m, 1)
y = np.exp(0.5*X ** 2) + np.random.randn(m, 1) / 10

In [None]:
 ## un-regularized
tree_reg_unregularized = DecisionTreeRegressor(random_state=42)
## regularized with min sample leaf rather than depth of the tree
tree_reg_min10leaf = DecisionTreeRegressor(random_state=42, min_samples_leaf=10) 
tree_reg_unregularized.fit(X, y)
tree_reg_min10leaf.fit(X, y)

x1 = np.linspace(0, 1, 500).reshape(-1, 1)
y_pred_ur = tree_reg_unregularized.predict(x1)
y_pred_r = tree_reg_min10leaf.predict(x1)


In [None]:

fig = plt.figure(figsize=(12,8))
ax = plt.axes()


ax.plot(X, y, "b.")
ax.plot(x1, y_pred_ur, "r.-", linewidth=2, label=r"$\hat{y}$")

ax.set_xlabel("$x_1$", fontsize=18)
ax.set_ylabel("$y$", fontsize=18, rotation=0)
ax.legend(loc="upper center", fontsize=18)
ax.set_title("No restrictions", fontsize=14)


#### Regularized decision tree regression shows piecewise prediction

In [None]:
fig = plt.figure(figsize=(12,8))
ax = plt.axes()
ax.plot(X, y, "b.")
ax.plot(x1, y_pred_r, "r.-", linewidth=2, label=r"$\hat{y}$")
ax.set_ylabel("$y$", fontsize=18, rotation=0)
ax.set_xlabel("$x_1$", fontsize=18)
ax.set_title("min_samples_leaf={}".format(tree_reg_min10leaf.min_samples_leaf), fontsize=14)


In [None]:
# Quadratic training set + noise
np.random.seed(42)
m = 200
X = np.random.rand(m, 1)
y = 4 * (X - 0.5) ** 2
y = y + np.random.randn(m, 1) / 10


fig = plt.figure(figsize=(14,8))
ax = plt.axes()

ax.scatter(X,y)

In [None]:
def plot_regression_predictions(tree_reg, X, y, axes=[0, 1, -0.2, 1], ylabel="$y$"):
    x1 = np.linspace(axes[0], axes[1], 500).reshape(-1, 1)
    y_pred = tree_reg.predict(x1)
    plt.xlabel("$x_1$", fontsize=18)
    if ylabel:
        plt.ylabel(ylabel, fontsize=18, rotation=0)
    plt.plot(X, y, "b.")
    plt.plot(x1, y_pred, "g-.", linewidth=2, label=r"$\hat{y}$")

plt.figure(figsize=(30, 25))

plt.subplot(221)
tree_reg1 = DecisionTreeRegressor(random_state=42, max_depth=2)
tree_reg1.fit(X, y)
plot_regression_predictions(tree_reg1, X, y)

plt.title("max_depth=2", fontsize=14)

plt.subplot(222)
tree_reg2 = DecisionTreeRegressor(random_state=42, max_depth=3)
tree_reg2.fit(X, y)
plot_regression_predictions(tree_reg2, X, y, ylabel=None)
plt.legend(loc="upper center", fontsize=18)
plt.title("max_depth=3", fontsize=14)

plt.subplot(223)
tree_reg3 = DecisionTreeRegressor(random_state=42, max_depth=4)
tree_reg3.fit(X, y)
plot_regression_predictions(tree_reg3, X, y, ylabel=None)
plt.legend(loc="upper center", fontsize=18)
plt.title("max_depth=4", fontsize=14)


plt.subplot(224)
tree_reg3 = DecisionTreeRegressor(random_state=42, max_depth=5)
tree_reg3.fit(X, y)
plot_regression_predictions(tree_reg3, X, y, ylabel=None)
plt.legend(loc="upper center", fontsize=18)
plt.title("max_depth=5", fontsize=14)


plt.show()