## Apress - Industrialized Machine Learning Examples

Andreas Francois Vermeulen
2019

### This is an example add-on to a book and needs to be accepted as part of that copyright.

# Chapter 004 Example 015E - Sickness 06

In [1]:
#!pip install graphviz

## Part A - Load Libraries

In [2]:
import os
import shutil
import graphviz as gv
import pandas as pd
import numpy as np
from sklearn import tree

In [3]:
os.environ["PATH"] += os.pathsep + 'c:/Program Files (x86)/Graphviz/bin/'

## Part B - Load Data

In [4]:
fileName = '../../Data/Sickness06.csv'
fileFullName = os.path.abspath(fileName)
print(fileFullName)

C:\Users\AndreVermeulen\Documents\My Book\apress\Industrial Machine Learning\book\GitHub\Upload\industrial-machine-learning\Data\Sickness06.csv


In [5]:
sickdf= pd.read_csv(fileFullName, header=0)
print(sickdf.shape)
print(sickdf.columns)

(5000, 10)
Index(['F01', 'F02', 'F03', 'F04', 'F05', 'F06', 'F07', 'F08', 'T01', 'T02'], dtype='object')


In [6]:
sick = np.array(sickdf)
sick_data = np.array(sickdf[['F01', 'F02', 'F03', 'F04', 'F05', 'F06', 'F07', 'F08']].copy(deep=True))
sick_target = np.array(sickdf[['T01', 'T02']].copy(deep=True))

## Part C - Load Model

In [7]:
clf = tree.DecisionTreeClassifier(class_weight='balanced', criterion='entropy', presort = True)

In [8]:
clfr = clf.fit(sick_data, sick_target)

In [9]:
print('Number of Classes:')
print(clfr.n_classes_)

Number of Classes:
[2 2]


In [10]:
print('Classes:')
print(clfr.classes_)

Classes:
[array([0, 1], dtype=int64), array([1, 2], dtype=int64)]


In [11]:
print('Feature importances:')
print(clfr.feature_importances_)

Feature importances:
[0.03340533 0.2315712  0.09481979 0.04596787 0.08493857 0.25377741
 0.14291961 0.11260023]


In [12]:
print('Number of features:')
print(clfr.n_features_)

Number of features:
8


In [13]:
print('Inferred value of max features:')
print(clfr.max_features_)

Inferred value of max features:
8


In [14]:
print('Number of Outputs:')
print(clfr.n_outputs_)

Number of Outputs:
2


In [15]:
print('Tree Nodes:')
print(clfr.tree_.node_count)

Tree Nodes:
487


In [16]:
n_nodes = clfr.tree_.node_count
children_left = clfr.tree_.children_left
children_right = clfr.tree_.children_right
feature = clfr.tree_.feature
threshold = clfr.tree_.threshold


node_depth = np.zeros(shape=n_nodes, dtype=np.int64)
is_leaves = np.zeros(shape=n_nodes, dtype=bool)
stack = [(0, -1)]  # seed is the root node id plus parent depth
while len(stack) > 0:
    node_id, parent_depth = stack.pop()
    node_depth[node_id] = parent_depth + 1

    # If we have a test node
    if (children_left[node_id] != children_right[node_id]):
        stack.append((children_left[node_id], parent_depth + 1))
        stack.append((children_right[node_id], parent_depth + 1))
    else:
        is_leaves[node_id] = True

print("The tree structure has %s nodes and has the following tree structure:" % n_nodes)
for i in range(n_nodes):
    if is_leaves[i]:
        print("%snode=%s leaf node." % (node_depth[i] * "\t", i))
    else:
        print("%snode=%s test node: go to node %s if X[:, %s] <= %s else to node %s." % (node_depth[i] * "\t",
                 i,
                 children_left[i],
                 feature[i],
                 threshold[i],
                 children_right[i],
                 ))
print()

The tree structure has 487 nodes and has the following tree structure:
node=0 test node: go to node 1 if X[:, 1] <= 123.5 else to node 254.
	node=1 test node: go to node 2 if X[:, 5] <= 26.49899959564209 else to node 15.
		node=2 test node: go to node 3 if X[:, 1] <= 106.5 else to node 4.
			node=3 leaf node.
			node=4 test node: go to node 5 if X[:, 5] <= 22.951499938964844 else to node 14.
				node=5 test node: go to node 6 if X[:, 4] <= 49.0 else to node 13.
					node=6 test node: go to node 7 if X[:, 6] <= 0.24849999696016312 else to node 8.
						node=7 leaf node.
						node=8 test node: go to node 9 if X[:, 7] <= 35.5 else to node 12.
							node=9 test node: go to node 10 if X[:, 4] <= -0.5 else to node 11.
								node=10 leaf node.
								node=11 leaf node.
							node=12 leaf node.
					node=13 leaf node.
				node=14 leaf node.
		node=15 test node: go to node 16 if X[:, 7] <= 28.5 else to node 99.
			node=16 test node: go to node 17 if X[:, 5] <= 31.399999618530273 else to n

										node=337 test node: go to node 338 if X[:, 4] <= -1.5 else to node 341.
											node=338 test node: go to node 339 if X[:, 6] <= 0.22499999403953552 else to node 340.
												node=339 leaf node.
												node=340 leaf node.
											node=341 leaf node.
									node=342 leaf node.
								node=343 test node: go to node 344 if X[:, 5] <= 35.44850158691406 else to node 349.
									node=344 test node: go to node 345 if X[:, 4] <= 207.0 else to node 346.
										node=345 leaf node.
										node=346 test node: go to node 347 if X[:, 6] <= 0.7335000038146973 else to node 348.
											node=347 leaf node.
											node=348 leaf node.
									node=349 test node: go to node 350 if X[:, 2] <= 54.0 else to node 351.
										node=350 leaf node.
										node=351 leaf node.
							node=352 leaf node.
						node=353 leaf node.
					node=354 test node: go to node 355 if X[:, 4] <= 192.5 else to node 374.
						node=355 test node: go to node 356 if X[:, 6] <= 0.3120

In [17]:
dot_data = tree.export_graphviz(clfr, out_file=None) 
graph = gv.Source(dot_data, format='png')

In [18]:
imagepath = os.path.join(*[os.path.dirname(os.path.dirname(os.getcwd())),'Results','Chapter 04'])
print(imagepath)

C:\Users\AndreVermeulen\Documents\My Book\apress\Industrial Machine Learning\book\GitHub\Upload\industrial-machine-learning\Results\Chapter 04


In [19]:
if not os.path.exists(imagepath):
    os.makedirs(imagepath)

In [20]:
imagename = os.path.join(*[os.path.dirname(os.path.dirname(os.getcwd())),'Results','Chapter 04','Chapter-004-Example-015-05.dot'])

In [21]:
graph.render(filename=imagename)

'C:\\Users\\AndreVermeulen\\Documents\\My Book\\apress\\Industrial Machine Learning\\book\\GitHub\\Upload\\industrial-machine-learning\\Results\\Chapter 04\\Chapter-004-Example-015-05.dot.png'

In [22]:
imagename1 = os.path.join(*[os.path.dirname(os.path.dirname(os.getcwd())),'Results','Chapter 04','Chapter-004-Example-015-05.dot.png'])
imagename2 = os.path.join(*[os.path.dirname(os.path.dirname(os.getcwd())),'Results','Chapter 04','Chapter-004-Example-015-05.png'])
os.remove(imagename)
shutil.move(imagename1,imagename2)

'C:\\Users\\AndreVermeulen\\Documents\\My Book\\apress\\Industrial Machine Learning\\book\\GitHub\\Upload\\industrial-machine-learning\\Results\\Chapter 04\\Chapter-004-Example-015-05.png'

In [23]:
gv.view(imagename2)

In [24]:
import datetime
now = datetime.datetime.now()
print('Done!',str(now))

Done! 2019-10-19 17:26:32.962457
