## Prepare Iris Dataset

In [25]:
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split

In [26]:
iris = pd.DataFrame()
iris_from_file = datasets.load_iris()

for column_name, row in zip(iris_from_file['feature_names'], iris_from_file['data'].T):
    iris[column_name] = row
iris["target"] = iris_from_file["target"]

In [27]:
iris_train, iris_test = train_test_split(iris, train_size=0.7, random_state=1)

## Multiclass Decision Tree

In [28]:
from sklearn.tree import DecisionTreeClassifier

In [29]:
dtc = DecisionTreeClassifier()

In [30]:
iris.columns

Index(['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)',
       'petal width (cm)', 'target'],
      dtype='object')

In [31]:
dtc.fit(iris_train[iris_from_file["feature_names"]], iris_train["target"])

DecisionTreeClassifier()

In [32]:
dtc.predict(iris_test[iris_from_file["feature_names"]])

array([0, 1, 1, 0, 2, 1, 2, 0, 0, 2, 1, 0, 2, 1, 1, 0, 1, 1, 0, 0, 1, 1,
       2, 0, 2, 1, 0, 0, 1, 2, 1, 2, 1, 2, 2, 0, 1, 0, 1, 2, 2, 0, 1, 2,
       1])

In [33]:
from sklearn.metrics import confusion_matrix

In [47]:
confusion_matrix(iris_train["target"], dtc.predict(iris_train[iris_from_file["feature_names"]]))

array([[36,  0,  0],
       [ 0, 32,  0],
       [ 0,  0, 37]])

In [48]:
confusion_matrix(iris_test["target"], dtc.predict(iris_test[iris_from_file["feature_names"]]))

array([[14,  0,  0],
       [ 0, 17,  1],
       [ 0,  1, 12]])

So pretty good! Only 2 misclassified examples from a decision tree with default parameters

In [36]:
help(DecisionTreeClassifier)

Help on class DecisionTreeClassifier in module sklearn.tree._classes:

class DecisionTreeClassifier(sklearn.base.ClassifierMixin, BaseDecisionTree)
 |  DecisionTreeClassifier(*, criterion='gini', splitter='best', max_depth=None, min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features=None, random_state=None, max_leaf_nodes=None, min_impurity_decrease=0.0, min_impurity_split=None, class_weight=None, ccp_alpha=0.0)
 |  
 |  A decision tree classifier.
 |  
 |  Read more in the :ref:`User Guide <tree>`.
 |  
 |  Parameters
 |  ----------
 |  criterion : {"gini", "entropy"}, default="gini"
 |      The function to measure the quality of a split. Supported criteria are
 |      "gini" for the Gini impurity and "entropy" for the information gain.
 |  
 |  splitter : {"best", "random"}, default="best"
 |      The strategy used to choose the split at each node. Supported
 |      strategies are "best" to choose the best split and "random" to choose
 |      the best ran

In [37]:
dtc.tree_

<sklearn.tree._tree.Tree at 0x7fd120731ce0>

In [39]:
import sklearn.tree
help(sklearn.tree._tree.Tree)

Help on class Tree in module sklearn.tree._tree:

class Tree(builtins.object)
 |  Array-based representation of a binary decision tree.
 |  
 |  The binary tree is represented as a number of parallel arrays. The i-th
 |  element of each array holds information about the node `i`. Node 0 is the
 |  tree's root. You can find a detailed description of all arrays in
 |  `_tree.pxd`. NOTE: Some of the arrays only apply to either leaves or split
 |  nodes, resp. In this case the values of nodes of the other type are
 |  arbitrary!
 |  
 |  Attributes
 |  ----------
 |  node_count : int
 |      The number of nodes (internal nodes + leaves) in the tree.
 |  
 |  capacity : int
 |      The current capacity (i.e., size) of the arrays, which is at least as
 |      great as `node_count`.
 |  
 |  max_depth : int
 |      The depth of the tree, i.e. the maximum depth of its leaves.
 |  
 |  children_left : array of int, shape [node_count]
 |      children_left[i] holds the node id of the left child 

In [40]:
dtc.tree_.node_count, dtc.tree_.max_depth

(13, 4)

13 nodes and a depth of 4

### We try a more shallow tree

In [41]:
dtc_2 = DecisionTreeClassifier(max_depth=2)

In [43]:
dtc_2.fit(iris_train[iris_from_file["feature_names"]], iris_train["target"])

DecisionTreeClassifier(max_depth=2)

In [46]:
confusion_matrix(iris_train["target"], dtc_2.predict(iris_train[iris_from_file["feature_names"]]))

array([[36,  0,  0],
       [ 0, 31,  1],
       [ 0,  3, 34]])

In [44]:
confusion_matrix(iris_test["target"], dtc_2.predict(iris_test[iris_from_file["feature_names"]]))

array([[14,  0,  0],
       [ 0, 17,  1],
       [ 0,  1, 12]])

In [45]:
dtc_2.tree_.node_count, dtc_2.tree_.max_depth

(5, 2)

### We try a tree with fewer features investigated

Note that max_features is the number of random features that are investigated at each node

In [49]:
dtc_3 = DecisionTreeClassifier(max_features=2)
dtc_3 = dtc_3.fit(iris_train[iris_from_file["feature_names"]], iris_train["target"])

In [50]:
confusion_matrix(iris_train["target"], dtc_3.predict(iris_train[iris_from_file["feature_names"]]))

array([[36,  0,  0],
       [ 0, 32,  0],
       [ 0,  0, 37]])

In [51]:
confusion_matrix(iris_test["target"], dtc_3.predict(iris_test[iris_from_file["feature_names"]]))

array([[14,  0,  0],
       [ 0, 17,  1],
       [ 0,  2, 11]])

In [52]:
dtc_3.tree_.node_count, dtc_3.tree_.max_depth

(15, 5)

In [55]:
dtc_3.feature_importances_

array([0.04360228, 0.02576336, 0.54349335, 0.38714102])

In [56]:
[dtc_3.tree_.feature[i] for i in range(dtc_3.tree_.node_count)]

[2, -2, 3, 0, 2, -2, 0, -2, -2, -2, 3, 1, -2, -2, -2]

Note that the tree had to be a one layer deeper in order to get pure samples with only two features considered for each node

### We try a tree with fewer features investigated and lower maximum depth

In [57]:
dtc_4 = DecisionTreeClassifier(max_features=2, max_depth=2)
dtc_4.fit(iris_train[iris_from_file["feature_names"]], iris_train["target"])

DecisionTreeClassifier(max_depth=2, max_features=2)

In [58]:
confusion_matrix(iris_train["target"], dtc_4.predict(iris_train[iris_from_file["feature_names"]]))

array([[32,  4,  0],
       [ 0, 31,  1],
       [ 0,  4, 33]])

In [59]:
confusion_matrix(iris_test["target"], dtc_4.predict(iris_test[iris_from_file["feature_names"]]))

array([[12,  2,  0],
       [ 1, 16,  1],
       [ 0,  1, 12]])

In [60]:
dtc_4.tree_.node_count, dtc_4.tree_.max_depth

(7, 2)