## Apress - Industrialized Machine Learning Examples

Andreas Francois Vermeulen
2019

### This is an example add-on to a book and needs to be accepted as part of that copyright.

# Chapter 004 Example 015C

In [1]:
#!pip install graphviz

## Part A - Load Libraries

In [2]:
import os
import shutil
import graphviz as gv
import pandas as pd
import numpy as np
from sklearn import tree

In [3]:
os.environ["PATH"] += os.pathsep + 'c:/Program Files (x86)/Graphviz/bin/'

## Part B - Load Data

In [4]:
fileName = '../../Data/Roses03.csv'
fileFullName = os.path.abspath(fileName)
print(fileFullName)

C:\Users\AndreVermeulen\Documents\My Book\apress\Industrialized Machine Learning\book\IML\Data\Roses03.csv


In [5]:
rosedf= pd.read_csv(fileFullName, header=0)
print(rosedf.shape)
print(rosedf.columns)

(1000, 5)
Index(['F01', 'F02', 'F03', 'F04', 'T'], dtype='object')


In [6]:
rose = np.array(rosedf)
rose_data = np.array(rosedf[['F01', 'F02', 'F03', 'F04']].copy(deep=True))
rose_target = np.array(rosedf[['T']].copy(deep=True))

## Part C - Load Model

In [7]:
clf = tree.DecisionTreeClassifier()

In [8]:
clfr = clf.fit(rose_data, rose_target)

In [9]:
print('Number of Classes:')
print(clfr.n_classes_)

Number of Classes:
4


In [10]:
print('Classes:')
print(clfr.classes_)

Classes:
[1 2 3 4]


In [11]:
print('Feature importances:')
print(clfr.feature_importances_)

Feature importances:
[0.00684541 0.00856643 0.75676629 0.22782186]


In [12]:
print('Number of features:')
print(clfr.n_features_)

Number of features:
4


In [13]:
print('Inferred value of max features:')
print(clfr.max_features_)

Inferred value of max features:
4


In [14]:
print('Number of Outputs:')
print(clfr.n_outputs_)

Number of Outputs:
1


In [15]:
print('Tree Nodes:')
print(clfr.tree_.node_count)

Tree Nodes:
37


In [16]:
n_nodes = clfr.tree_.node_count
children_left = clfr.tree_.children_left
children_right = clfr.tree_.children_right
feature = clfr.tree_.feature
threshold = clfr.tree_.threshold


node_depth = np.zeros(shape=n_nodes, dtype=np.int64)
is_leaves = np.zeros(shape=n_nodes, dtype=bool)
stack = [(0, -1)]  # seed is the root node id plus parent depth
while len(stack) > 0:
    node_id, parent_depth = stack.pop()
    node_depth[node_id] = parent_depth + 1

    # If we have a test node
    if (children_left[node_id] != children_right[node_id]):
        stack.append((children_left[node_id], parent_depth + 1))
        stack.append((children_right[node_id], parent_depth + 1))
    else:
        is_leaves[node_id] = True

print("The tree structure has %s nodes and has the following tree structure:" % n_nodes)
for i in range(n_nodes):
    if is_leaves[i]:
        print("%snode=%s leaf node." % (node_depth[i] * "\t", i))
    else:
        print("%snode=%s test node: go to node %s if X[:, %s] <= %s else to node %s." % (node_depth[i] * "\t",
                 i,
                 children_left[i],
                 feature[i],
                 threshold[i],
                 children_right[i],
                 ))
print()

The tree structure has 37 nodes and has the following tree structure:
node=0 test node: go to node 1 if X[:, 2] <= 8.928000211715698 else to node 36.
	node=1 test node: go to node 2 if X[:, 2] <= 2.428000032901764 else to node 3.
		node=2 leaf node.
		node=3 test node: go to node 4 if X[:, 3] <= 1.7260000109672546 else to node 25.
			node=4 test node: go to node 5 if X[:, 2] <= 4.9054999351501465 else to node 12.
				node=5 test node: go to node 6 if X[:, 3] <= 1.6865000128746033 else to node 11.
					node=6 test node: go to node 7 if X[:, 0] <= 4.836499929428101 else to node 10.
						node=7 test node: go to node 8 if X[:, 3] <= 1.3019999861717224 else to node 9.
							node=8 leaf node.
							node=9 leaf node.
						node=10 leaf node.
					node=11 leaf node.
				node=12 test node: go to node 13 if X[:, 2] <= 5.328000068664551 else to node 24.
					node=13 test node: go to node 14 if X[:, 3] <= 1.5864999890327454 else to node 23.
						node=14 test node: go to node 15 if X[:, 0] <= 6

In [17]:
dot_data = tree.export_graphviz(clfr, out_file=None) 
graph = gv.Source(dot_data, format='png')

In [18]:
imagepath = os.path.join(*[os.path.dirname(os.path.dirname(os.getcwd())),'Results','Chapter 04'])
print(imagepath)

C:\Users\AndreVermeulen\Documents\My Book\apress\Industrialized Machine Learning\book\IML\Results\Chapter 04


In [19]:
if not os.path.exists(imagepath):
    os.makedirs(imagepath)

In [20]:
imagename = os.path.join(*[os.path.dirname(os.path.dirname(os.getcwd())),'Results','Chapter 04','Chapter-004-Example-015-03.dot'])

In [21]:
graph.render(filename=imagename)

'C:\\Users\\AndreVermeulen\\Documents\\My Book\\apress\\Industrialized Machine Learning\\book\\IML\\Results\\Chapter 04\\Chapter-004-Example-015-03.dot.png'

In [22]:
imagename1 = os.path.join(*[os.path.dirname(os.path.dirname(os.getcwd())),'Results','Chapter 04','Chapter-004-Example-015-03.dot.png'])
imagename2 = os.path.join(*[os.path.dirname(os.path.dirname(os.getcwd())),'Results','Chapter 04','Chapter-004-Example-015-03.png'])
os.remove(imagename)
shutil.move(imagename1,imagename2)

'C:\\Users\\AndreVermeulen\\Documents\\My Book\\apress\\Industrialized Machine Learning\\book\\IML\\Results\\Chapter 04\\Chapter-004-Example-015-03.png'

In [23]:
gv.view(imagename2)

In [24]:
import datetime
now = datetime.datetime.now()
print('Done!',str(now))

Done! 2019-04-08 16:08:37.369059
