## Apress - Industrialized Machine Learning Examples

Andreas Francois Vermeulen
2019

### This is an example add-on to a book and needs to be accepted as part of that copyright.

# Chapter 004 Example 015F - Sickness 06

In [1]:
#!pip install graphviz

## Part A - Load Libraries

In [2]:
import os
import shutil
import graphviz as gv
import pandas as pd
import numpy as np
from sklearn import tree

In [3]:
os.environ["PATH"] += os.pathsep + 'c:/Program Files (x86)/Graphviz/bin/'

## Part B - Load Data

In [4]:
fileName = '../../Data/Sickness06.csv'
fileFullName = os.path.abspath(fileName)
print(fileFullName)

C:\Users\AndreVermeulen\Documents\My Book\apress\Industrialized Machine Learning\book\IML\Data\Sickness06.csv


In [5]:
sickdf= pd.read_csv(fileFullName, header=0)
print(sickdf.shape)
print(sickdf.columns)

(5000, 10)
Index(['F01', 'F02', 'F03', 'F04', 'F05', 'F06', 'F07', 'F08', 'T01', 'T02'], dtype='object')


In [6]:
sick = np.array(sickdf)
sick_data = np.array(sickdf[['F01', 'F02', 'F03', 'F04', 'F05', 'F06', 'F07', 'F08']].copy(deep=True))
sick_target = np.array(sickdf[['T01', 'T02']].copy(deep=True))

## Part C - Load Model

In [7]:
clf = tree.ExtraTreeClassifier(class_weight='balanced', criterion='entropy', splitter='random')

In [8]:
clfr = clf.fit(sick_data, sick_target)

In [9]:
print('Number of Classes:')
print(clfr.n_classes_)

Number of Classes:
[2 2]


In [10]:
print('Classes:')
print(clfr.classes_)

Classes:
[array([0, 1], dtype=int64), array([1, 2], dtype=int64)]


In [11]:
print('Feature importances:')
print(clfr.feature_importances_)

Feature importances:
[0.06754637 0.20549076 0.0930483  0.06235836 0.09106271 0.23714442
 0.14185824 0.10149085]


In [12]:
print('Number of features:')
print(clfr.n_features_)

Number of features:
8


In [13]:
print('Inferred value of max features:')
print(clfr.max_features_)

Inferred value of max features:
2


In [14]:
print('Number of Outputs:')
print(clfr.n_outputs_)

Number of Outputs:
2


In [15]:
print('Tree Nodes:')
print(clfr.tree_.node_count)

Tree Nodes:
1675


In [16]:
n_nodes = clfr.tree_.node_count
children_left = clfr.tree_.children_left
children_right = clfr.tree_.children_right
feature = clfr.tree_.feature
threshold = clfr.tree_.threshold


node_depth = np.zeros(shape=n_nodes, dtype=np.int64)
is_leaves = np.zeros(shape=n_nodes, dtype=bool)
stack = [(0, -1)]  # seed is the root node id plus parent depth
while len(stack) > 0:
    node_id, parent_depth = stack.pop()
    node_depth[node_id] = parent_depth + 1

    # If we have a test node
    if (children_left[node_id] != children_right[node_id]):
        stack.append((children_left[node_id], parent_depth + 1))
        stack.append((children_right[node_id], parent_depth + 1))
    else:
        is_leaves[node_id] = True

print("The tree structure has %s nodes and has the following tree structure:" % n_nodes)
for i in range(n_nodes):
    if is_leaves[i]:
        print("%snode=%s leaf node." % (node_depth[i] * "\t", i))
    else:
        print("%snode=%s test node: go to node %s if X[:, %s] <= %s else to node %s." % (node_depth[i] * "\t",
                 i,
                 children_left[i],
                 feature[i],
                 threshold[i],
                 children_right[i],
                 ))
print()

The tree structure has 1675 nodes and has the following tree structure:
node=0 test node: go to node 1 if X[:, 5] <= 24.93878845496832 else to node 100.
	node=1 test node: go to node 2 if X[:, 4] <= 328.64471308544495 else to node 99.
		node=2 test node: go to node 3 if X[:, 1] <= 97.1560739256237 else to node 4.
			node=3 leaf node.
			node=4 test node: go to node 5 if X[:, 0] <= 3.482747848836121 else to node 60.
				node=5 test node: go to node 6 if X[:, 0] <= -0.27613786388008776 else to node 7.
					node=6 leaf node.
					node=7 test node: go to node 8 if X[:, 7] <= 51.766652359518524 else to node 47.
						node=8 test node: go to node 9 if X[:, 4] <= 108.23491609154033 else to node 32.
							node=9 test node: go to node 10 if X[:, 2] <= 1.611811955278652 else to node 11.
								node=10 leaf node.
								node=11 test node: go to node 12 if X[:, 3] <= 10.102579655639166 else to node 13.
									node=12 leaf node.
									node=13 test node: go to node 14 if X[:, 5] <= 12.487117

																	node=173 test node: go to node 174 if X[:, 6] <= 0.2992164314580176 else to node 187.
																		node=174 test node: go to node 175 if X[:, 4] <= -1.7960349413547827 else to node 176.
																			node=175 leaf node.
																			node=176 test node: go to node 177 if X[:, 7] <= 28.485154094866083 else to node 182.
																				node=177 test node: go to node 178 if X[:, 1] <= 119.80841736626272 else to node 179.
																					node=178 leaf node.
																					node=179 test node: go to node 180 if X[:, 1] <= 121.91411310057813 else to node 181.
																						node=180 leaf node.
																						node=181 leaf node.
																				node=182 test node: go to node 183 if X[:, 2] <= 64.84556005003189 else to node 184.
																					node=183 leaf node.
																					node=184 test node: go to node 185 if X[:, 2] <= 67.71865244848591 else to node 186.
																						node=185 leaf node.
																

																			node=707 leaf node.
																			node=708 leaf node.
																		node=709 leaf node.
																node=710 test node: go to node 711 if X[:, 5] <= 29.54997845090904 else to node 712.
																	node=711 leaf node.
																	node=712 leaf node.
															node=713 leaf node.
														node=714 leaf node.
											node=715 test node: go to node 716 if X[:, 4] <= 81.08088580522727 else to node 735.
												node=716 test node: go to node 717 if X[:, 7] <= 28.31757353167868 else to node 724.
													node=717 test node: go to node 718 if X[:, 3] <= 15.474802759231443 else to node 719.
														node=718 leaf node.
														node=719 test node: go to node 720 if X[:, 6] <= 0.2798163162942733 else to node 721.
															node=720 leaf node.
															node=721 test node: go to node 722 if X[:, 2] <= 65.3482511194182 else to node 723.
																node=722 leaf node.
																node=723 leaf node.
								

																	node=882 test node: go to node 883 if X[:, 6] <= 0.33307992451305624 else to node 912.
																		node=883 test node: go to node 884 if X[:, 3] <= 41.37087063788012 else to node 911.
																			node=884 test node: go to node 885 if X[:, 2] <= 67.29440106418654 else to node 896.
																				node=885 test node: go to node 886 if X[:, 6] <= 0.3099018465587159 else to node 895.
																					node=886 test node: go to node 887 if X[:, 3] <= 31.35997681150212 else to node 894.
																						node=887 test node: go to node 888 if X[:, 1] <= 109.99758531059958 else to node 889.
																							node=888 leaf node.
																							node=889 test node: go to node 890 if X[:, 0] <= 2.2002574676648985 else to node 893.
																								node=890 test node: go to node 891 if X[:, 6] <= 0.2555160700706775 else to node 892.
																									node=891 leaf node.
																									node=892 leaf node.
																	

																					node=1381 leaf node.
																node=1382 test node: go to node 1383 if X[:, 2] <= 55.83687115127075 else to node 1386.
																	node=1383 test node: go to node 1384 if X[:, 4] <= 34.03749280052143 else to node 1385.
																		node=1384 leaf node.
																		node=1385 leaf node.
																	node=1386 leaf node.
														node=1387 leaf node.
													node=1388 leaf node.
											node=1389 test node: go to node 1390 if X[:, 6] <= 0.7143374619749278 else to node 1393.
												node=1390 test node: go to node 1391 if X[:, 6] <= 0.6947838559575183 else to node 1392.
													node=1391 leaf node.
													node=1392 leaf node.
												node=1393 test node: go to node 1394 if X[:, 7] <= 23.474654490349188 else to node 1395.
													node=1394 leaf node.
													node=1395 test node: go to node 1396 if X[:, 1] <= 148.06537527687166 else to node 1399.
														node=1396 test node: go to node 1397 if X[:,

In [17]:
dot_data = tree.export_graphviz(clfr, out_file=None) 
graph = gv.Source(dot_data, format='png')

In [18]:
imagepath = os.path.join(*[os.path.dirname(os.path.dirname(os.getcwd())),'Results','Chapter 04'])
print(imagepath)

C:\Users\AndreVermeulen\Documents\My Book\apress\Industrialized Machine Learning\book\IML\Results\Chapter 04


In [19]:
if not os.path.exists(imagepath):
    os.makedirs(imagepath)

In [20]:
imagename = os.path.join(*[os.path.dirname(os.path.dirname(os.getcwd())),'Results','Chapter 04','Chapter-004-Example-015-06.dot'])

In [21]:
graph.render(filename=imagename)

dot: graph is too large for cairo-renderer bitmaps. Scaling by 0.861451 to fit


'C:\\Users\\AndreVermeulen\\Documents\\My Book\\apress\\Industrialized Machine Learning\\book\\IML\\Results\\Chapter 04\\Chapter-004-Example-015-06.dot.png'

In [22]:
imagename1 = os.path.join(*[os.path.dirname(os.path.dirname(os.getcwd())),'Results','Chapter 04','Chapter-004-Example-015-06.dot.png'])
imagename2 = os.path.join(*[os.path.dirname(os.path.dirname(os.getcwd())),'Results','Chapter 04','Chapter-004-Example-015-06.png'])
os.remove(imagename)
shutil.move(imagename1,imagename2)

'C:\\Users\\AndreVermeulen\\Documents\\My Book\\apress\\Industrialized Machine Learning\\book\\IML\\Results\\Chapter 04\\Chapter-004-Example-015-06.png'

In [23]:
gv.view(imagename2)

In [24]:
import datetime
now = datetime.datetime.now()
print('Done!',str(now))

Done! 2019-04-08 19:33:13.290063
