# Chapter 14: Trees and Forests

## 14.1 Training a Decision Tree Classifier

In [1]:
# Load libraries
from sklearn.tree import DecisionTreeClassifier
from sklearn import datasets

# Load data
iris = datasets.load_iris()
features = iris.data
target = iris.target

# Create decision tree classifier object
decisiontree = DecisionTreeClassifier(random_state=0)

# Train model
model = decisiontree.fit(features, target)

Predict the class of an observation:

In [2]:
# Make new observation
observation = [[ 5,  4,  3,  2]]

# Predict observation's class
model.predict(observation)

array([1])

See the predicted class probabilities of the observation:

In [3]:
# View predicted class probabilities for the three classes
model.predict_proba(observation)

array([[0., 1., 0.]])

Choose a different impurity measurement (instead of the default Gini):

In [4]:
# Create decision tree classifier object using entropy
decisiontree_entropy = DecisionTreeClassifier(
    criterion='entropy', random_state=0)

# Train model
model_entropy = decisiontree_entropy.fit(features, target)

In [5]:
model_entropy.predict(observation)

array([1])

In [6]:
# View predicted class probabilities for the three classes
model_entropy.predict_proba(observation)

array([[0., 1., 0.]])

## 14.2 Training a Decision Tree Regressor
Train a regression model using a decision tree

In [7]:
# Load libraries
from sklearn.tree import DecisionTreeRegressor
from sklearn import datasets

# Load data with only two features
boston = datasets.load_boston()
features = boston.data[:,0:2]
target = boston.target

# Create decision tree classifier object
decisiontree = DecisionTreeRegressor(random_state=0)

# Train model
model = decisiontree.fit(features, target)

Potential splits are by default measured on how much they reduce mean squared error (MSE)

In [8]:
# Make new observation
observation = [[0.02, 16]]

# Predict observation's value
model.predict(observation)

array([33.])

Construct a tree whose splits reduce mean absolute error (MAE):

In [9]:
# Create decision tree classifier object using entropy
decisiontree_mae = DecisionTreeRegressor(criterion="mae", random_state=0)

# Train model
model_mae = decisiontree_mae.fit(features, target)

In [10]:
# Predict observation's value
model_mae.predict(observation)

array([33.])

## 14.3 Visualizing a Decision Tree Model

In [11]:
# pip install pydotplus

In [12]:
# pip install graphviz

In [13]:
# conda install graphviz

In [14]:
# Load libraries
import pydotplus
from sklearn.tree import DecisionTreeClassifier
from sklearn import datasets
from IPython.display import Image
from sklearn import tree

# Load data
iris = datasets.load_iris()
features = iris.data
target = iris.target

# Create decision tree classifier object
decisiontree = DecisionTreeClassifier(random_state=0)

# Train model
model = decisiontree.fit(features, target)

# Create DOT data
dot_data = tree.export_graphviz(decisiontree,
                                out_file=None,
                                feature_names=iris.feature_names,
                                class_names=iris.target_names)

In [15]:
# Draw graph
graph = pydotplus.graph_from_dot_data(dot_data)

# Show graph
Image(graph.create_png())

InvocationException: GraphViz's executables not found