## Apress - Industrialized Machine Learning Examples

Andreas Francois Vermeulen
2019

### This is an example add-on to a book and needs to be accepted as part of that copyright.

# Chapter 004 Example 015F - Sickness 06

In [1]:
#!pip install graphviz

## Part A - Load Libraries

In [2]:
import os
import shutil
import graphviz as gv
import pandas as pd
import numpy as np
from sklearn import tree

In [3]:
os.environ["PATH"] += os.pathsep + 'c:/Program Files (x86)/Graphviz/bin/'

## Part B - Load Data

In [4]:
fileName = '../../Data/Sickness06.csv'
fileFullName = os.path.abspath(fileName)
print(fileFullName)

C:\Users\AndreVermeulen\Documents\My Book\apress\Industrial Machine Learning\book\GitHub\Upload\industrial-machine-learning\Data\Sickness06.csv


In [5]:
sickdf= pd.read_csv(fileFullName, header=0)
print(sickdf.shape)
print(sickdf.columns)

(5000, 10)
Index(['F01', 'F02', 'F03', 'F04', 'F05', 'F06', 'F07', 'F08', 'T01', 'T02'], dtype='object')


In [6]:
sick = np.array(sickdf)
sick_data = np.array(sickdf[['F01', 'F02', 'F03', 'F04', 'F05', 'F06', 'F07', 'F08']].copy(deep=True))
sick_target = np.array(sickdf[['T01', 'T02']].copy(deep=True))

## Part C - Load Model

In [7]:
clf = tree.ExtraTreeClassifier(class_weight='balanced', criterion='entropy', splitter='random')

In [8]:
clfr = clf.fit(sick_data, sick_target)

In [9]:
print('Number of Classes:')
print(clfr.n_classes_)

Number of Classes:
[2 2]


In [10]:
print('Classes:')
print(clfr.classes_)

Classes:
[array([0, 1], dtype=int64), array([1, 2], dtype=int64)]


In [11]:
print('Feature importances:')
print(clfr.feature_importances_)

Feature importances:
[0.06988761 0.25924477 0.09793618 0.08229473 0.10882127 0.14652973
 0.13344736 0.10183836]


In [12]:
print('Number of features:')
print(clfr.n_features_)

Number of features:
8


In [13]:
print('Inferred value of max features:')
print(clfr.max_features_)

Inferred value of max features:
2


In [14]:
print('Number of Outputs:')
print(clfr.n_outputs_)

Number of Outputs:
2


In [15]:
print('Tree Nodes:')
print(clfr.tree_.node_count)

Tree Nodes:
1731


In [16]:
n_nodes = clfr.tree_.node_count
children_left = clfr.tree_.children_left
children_right = clfr.tree_.children_right
feature = clfr.tree_.feature
threshold = clfr.tree_.threshold


node_depth = np.zeros(shape=n_nodes, dtype=np.int64)
is_leaves = np.zeros(shape=n_nodes, dtype=bool)
stack = [(0, -1)]  # seed is the root node id plus parent depth
while len(stack) > 0:
    node_id, parent_depth = stack.pop()
    node_depth[node_id] = parent_depth + 1

    # If we have a test node
    if (children_left[node_id] != children_right[node_id]):
        stack.append((children_left[node_id], parent_depth + 1))
        stack.append((children_right[node_id], parent_depth + 1))
    else:
        is_leaves[node_id] = True

print("The tree structure has %s nodes and has the following tree structure:" % n_nodes)
for i in range(n_nodes):
    if is_leaves[i]:
        print("%snode=%s leaf node." % (node_depth[i] * "\t", i))
    else:
        print("%snode=%s test node: go to node %s if X[:, %s] <= %s else to node %s." % (node_depth[i] * "\t",
                 i,
                 children_left[i],
                 feature[i],
                 threshold[i],
                 children_right[i],
                 ))
print()

The tree structure has 1731 nodes and has the following tree structure:
node=0 test node: go to node 1 if X[:, 4] <= 323.3428494750256 else to node 1666.
	node=1 test node: go to node 2 if X[:, 1] <= 140.0270862640008 else to node 1251.
		node=2 test node: go to node 3 if X[:, 1] <= 82.50646086898934 else to node 38.
			node=3 test node: go to node 4 if X[:, 2] <= 52.5405141629933 else to node 5.
				node=4 leaf node.
				node=5 test node: go to node 6 if X[:, 2] <= 60.84020234264443 else to node 7.
					node=6 leaf node.
					node=7 test node: go to node 8 if X[:, 5] <= 41.68330908748075 else to node 37.
						node=8 test node: go to node 9 if X[:, 1] <= 28.639827150683768 else to node 14.
							node=9 test node: go to node 10 if X[:, 7] <= 39.51765513397644 else to node 13.
								node=10 test node: go to node 11 if X[:, 5] <= 36.923273038311635 else to node 12.
									node=11 leaf node.
									node=12 leaf node.
								node=13 leaf node.
							node=14 test node: go to node 1

																						node=498 leaf node.
																						node=499 test node: go to node 500 if X[:, 7] <= 38.87203590659054 else to node 501.
																							node=500 leaf node.
																							node=501 leaf node.
																				node=502 test node: go to node 503 if X[:, 7] <= 23.003602541053482 else to node 504.
																					node=503 leaf node.
																					node=504 leaf node.
																			node=505 test node: go to node 506 if X[:, 7] <= 36.28633635551033 else to node 565.
																				node=506 test node: go to node 507 if X[:, 0] <= 3.4815459356091667 else to node 526.
																					node=507 test node: go to node 508 if X[:, 1] <= 99.46400788680837 else to node 509.
																						node=508 leaf node.
																						node=509 test node: go to node 510 if X[:, 6] <= 0.28144683336364257 else to node 517.
																							node=510 test node: go to node 511 if X[:, 7] <= 33.74684923642634 else to node 516.
		

																	node=871 test node: go to node 872 if X[:, 4] <= 137.19695650562502 else to node 873.
																		node=872 leaf node.
																		node=873 test node: go to node 874 if X[:, 1] <= 116.24456478620161 else to node 875.
																			node=874 leaf node.
																			node=875 test node: go to node 876 if X[:, 7] <= 31.2155430471597 else to node 877.
																				node=876 leaf node.
																				node=877 test node: go to node 878 if X[:, 0] <= 3.711886368557758 else to node 879.
																					node=878 leaf node.
																					node=879 leaf node.
																node=880 leaf node.
														node=881 leaf node.
												node=882 leaf node.
									node=883 test node: go to node 884 if X[:, 7] <= 19.089648176492027 else to node 889.
										node=884 test node: go to node 885 if X[:, 7] <= 18.301520338888057 else to node 886.
											node=885 leaf node.
											node=886 test node: go to node 887 if X[:, 6] <= 1.

																node=1114 test node: go to node 1115 if X[:, 4] <= 201.96465272920423 else to node 1116.
																	node=1115 leaf node.
																	node=1116 leaf node.
												node=1117 test node: go to node 1118 if X[:, 4] <= 122.60611083247052 else to node 1125.
													node=1118 test node: go to node 1119 if X[:, 6] <= 0.8939162270397755 else to node 1124.
														node=1119 test node: go to node 1120 if X[:, 7] <= 33.543931554790554 else to node 1123.
															node=1120 test node: go to node 1121 if X[:, 2] <= 50.181433182294214 else to node 1122.
																node=1121 leaf node.
																node=1122 leaf node.
															node=1123 leaf node.
														node=1124 leaf node.
													node=1125 test node: go to node 1126 if X[:, 2] <= 62.33127934780497 else to node 1127.
														node=1126 leaf node.
														node=1127 test node: go to node 1128 if X[:, 5] <= 35.00032173266149 else to node 1129.
															node=1128 l

									node=1381 leaf node.
							node=1382 leaf node.
			node=1383 test node: go to node 1384 if X[:, 0] <= -2.7716619771773283 else to node 1387.
				node=1384 test node: go to node 1385 if X[:, 1] <= 161.50145551306264 else to node 1386.
					node=1385 leaf node.
					node=1386 leaf node.
				node=1387 test node: go to node 1388 if X[:, 6] <= 1.6281539114390886 else to node 1665.
					node=1388 test node: go to node 1389 if X[:, 6] <= 1.0134984057819918 else to node 1646.
						node=1389 test node: go to node 1390 if X[:, 4] <= 264.06838704415986 else to node 1637.
							node=1390 test node: go to node 1391 if X[:, 6] <= 0.9334825537109751 else to node 1636.
								node=1391 test node: go to node 1392 if X[:, 6] <= 0.5015331111941694 else to node 1551.
									node=1392 test node: go to node 1393 if X[:, 3] <= 33.16421841185736 else to node 1522.
										node=1393 test node: go to node 1394 if X[:, 1] <= 143.60315219061596 else to node 1405.
											node=1394 test node: g

In [17]:
dot_data = tree.export_graphviz(clfr, out_file=None) 
graph = gv.Source(dot_data, format='png')

In [18]:
imagepath = os.path.join(*[os.path.dirname(os.path.dirname(os.getcwd())),'Results','Chapter 04'])
print(imagepath)

C:\Users\AndreVermeulen\Documents\My Book\apress\Industrial Machine Learning\book\GitHub\Upload\industrial-machine-learning\Results\Chapter 04


In [19]:
if not os.path.exists(imagepath):
    os.makedirs(imagepath)

In [20]:
imagename = os.path.join(*[os.path.dirname(os.path.dirname(os.getcwd())),'Results','Chapter 04','Chapter-004-Example-015-06.dot'])

In [21]:
graph.render(filename=imagename)

dot: graph is too large for cairo-renderer bitmaps. Scaling by 0.839577 to fit


'C:\\Users\\AndreVermeulen\\Documents\\My Book\\apress\\Industrial Machine Learning\\book\\GitHub\\Upload\\industrial-machine-learning\\Results\\Chapter 04\\Chapter-004-Example-015-06.dot.png'

In [22]:
imagename1 = os.path.join(*[os.path.dirname(os.path.dirname(os.getcwd())),'Results','Chapter 04','Chapter-004-Example-015-06.dot.png'])
imagename2 = os.path.join(*[os.path.dirname(os.path.dirname(os.getcwd())),'Results','Chapter 04','Chapter-004-Example-015-06.png'])
os.remove(imagename)
shutil.move(imagename1,imagename2)

'C:\\Users\\AndreVermeulen\\Documents\\My Book\\apress\\Industrial Machine Learning\\book\\GitHub\\Upload\\industrial-machine-learning\\Results\\Chapter 04\\Chapter-004-Example-015-06.png'

In [23]:
gv.view(imagename2)

In [24]:
import datetime
now = datetime.datetime.now()
print('Done!',str(now))

Done! 2019-10-19 17:27:08.301458
