## Apress - Industrialized Machine Learning Examples

Andreas Francois Vermeulen
2019

### This is an example add-on to a book and needs to be accepted as part of that copyright.

# Chapter 004 Example 015A

In [1]:
#!pip install graphviz

## Part A - Load Libraries

In [2]:
import os
import shutil
import graphviz as gv
import pandas as pd
import numpy as np
from sklearn import tree

In [3]:
os.environ["PATH"] += os.pathsep + 'c:/Program Files (x86)/Graphviz/bin/'

## Part B - Load Data

In [4]:
fileName = '../../Data/Roses01.csv'
fileFullName = os.path.abspath(fileName)
print(fileFullName)

C:\Users\AndreVermeulen\Documents\My Book\apress\Industrialized Machine Learning\book\IML\Data\Roses01.csv


In [5]:
rosedf= pd.read_csv(fileFullName, header=0)
print(rosedf.shape)
print(rosedf.columns)

(150, 6)
Index(['F01', 'F02', 'F03', 'F04', 'T', 'T2'], dtype='object')


In [6]:
rose = np.array(rosedf)
rose_data = np.array(rosedf[['F01','F02']].copy(deep=True))
rose_target = np.array(rosedf[['T']].copy(deep=True))

## Part C - Load Model

In [7]:
clf = tree.DecisionTreeClassifier(max_features=None, presort=True, criterion='gini', class_weight='balanced',random_state=321)

In [8]:
print(clf.get_params(deep=True))

{'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': None, 'max_features': None, 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_impurity_split': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'presort': True, 'random_state': 321, 'splitter': 'best'}


## Part D - Train the Model

In [9]:
clfr = clf.fit(rose_data, rose_target)

## Part E - Score the Model

In [10]:
print('Score: %7.4f' % (clf.score(rose_data[75:], rose_target[75:])))

Score:  0.9067


In [11]:
tx=rose_data[:8]
ty=rose_target[:8]
typ=clfr.predict(tx)
print('Features:')
print(tx)
print('Targets:')
print(ty)
print('Predict Targets:')
print(ty)
print('----------------------------------')
print('Predict class probabilities:')
print(clfr.predict_proba(tx))
print('----------------------------------')
print('Score: %7.4f' % (clf.score(tx, ty)))

Features:
[[5.  2. ]
 [6.  2.2]
 [6.2 2.2]
 [6.  2.2]
 [4.5 2.3]
 [5.5 2.3]
 [6.3 2.3]
 [5.  2.3]]
Targets:
[[2]
 [2]
 [2]
 [3]
 [1]
 [2]
 [2]
 [2]]
Predict Targets:
[[2]
 [2]
 [2]
 [3]
 [1]
 [2]
 [2]
 [2]]
----------------------------------
Predict class probabilities:
[[0.  1.  0. ]
 [0.  0.5 0.5]
 [0.  1.  0. ]
 [0.  0.5 0.5]
 [1.  0.  0. ]
 [0.  1.  0. ]
 [0.  1.  0. ]
 [0.  1.  0. ]]
----------------------------------
Score:  0.8750


In [12]:
t=rose_data[-1:]
print(t)

[[5.7 4.4]]


In [13]:
print(clfr.decision_path(t))

  (0, 0)	1
  (0, 14)	1
  (0, 15)	1
  (0, 51)	1


In [14]:
print('Number of Classes:')
print(clfr.n_classes_)

Number of Classes:
3


In [15]:
print('Classes:')
print(clfr.classes_)

Classes:
[1 2 3]


In [16]:
print('Feature importances:')
print(clfr.feature_importances_)

Feature importances:
[0.6604608 0.3395392]


In [17]:
print('Number of features:')
print(clfr.n_features_)

Number of features:
2


In [18]:
print('Inferred value of max features:')
print(clfr.max_features_)

Inferred value of max features:
2


In [19]:
print('Number of Outputs:')
print(clfr.n_outputs_)

Number of Outputs:
1


In [20]:
print('Tree Nodes:')
print(clfr.tree_.node_count)

Tree Nodes:
93


## Part F - Display the Model

In [21]:
n_nodes = clfr.tree_.node_count
children_left = clfr.tree_.children_left
children_right = clfr.tree_.children_right
feature = clfr.tree_.feature
threshold = clfr.tree_.threshold


node_depth = np.zeros(shape=n_nodes, dtype=np.int64)
is_leaves = np.zeros(shape=n_nodes, dtype=bool)
stack = [(0, -1)]  # seed is the root node id plus parent depth
while len(stack) > 0:
    node_id, parent_depth = stack.pop()
    node_depth[node_id] = parent_depth + 1

    if (children_left[node_id] != children_right[node_id]):
        stack.append((children_left[node_id], parent_depth + 1))
        stack.append((children_right[node_id], parent_depth + 1))
    else:
        is_leaves[node_id] = True

print("The tree structure has %s nodes and has the following tree structure:" % n_nodes)
for i in range(n_nodes):
    if is_leaves[i]:
        print("%snode=%s leaf node." % (node_depth[i] * "\t", i))
    else:
        print("%snode=%s test node: go to node %s if X[:, %s] <= %s else to node %s." % (node_depth[i] * "\t",
                 i,
                 children_left[i],
                 feature[i],
                 threshold[i],
                 children_right[i],
                 ))
print()

The tree structure has 93 nodes and has the following tree structure:
node=0 test node: go to node 1 if X[:, 0] <= 5.450000047683716 else to node 14.
	node=1 test node: go to node 2 if X[:, 1] <= 2.8000000715255737 else to node 9.
		node=2 test node: go to node 3 if X[:, 0] <= 4.700000047683716 else to node 4.
			node=3 leaf node.
			node=4 test node: go to node 5 if X[:, 0] <= 4.950000047683716 else to node 8.
				node=5 test node: go to node 6 if X[:, 1] <= 2.450000047683716 else to node 7.
					node=6 leaf node.
					node=7 leaf node.
				node=8 leaf node.
		node=9 test node: go to node 10 if X[:, 0] <= 5.3500001430511475 else to node 11.
			node=10 leaf node.
			node=11 test node: go to node 12 if X[:, 1] <= 3.200000047683716 else to node 13.
				node=12 leaf node.
				node=13 leaf node.
	node=14 test node: go to node 15 if X[:, 0] <= 6.1499998569488525 else to node 52.
		node=15 test node: go to node 16 if X[:, 1] <= 3.450000047683716 else to node 51.
			node=16 test node: go to n

In [22]:
dot_data = tree.export_graphviz(clfr, out_file=None) 
graph = gv.Source(dot_data, format='png')

In [23]:
imagepath = os.path.join(*[os.path.dirname(os.path.dirname(os.getcwd())),'Results','Chapter 04'])
print(imagepath)

C:\Users\AndreVermeulen\Documents\My Book\apress\Industrialized Machine Learning\book\IML\Results\Chapter 04


In [24]:
if not os.path.exists(imagepath):
    os.makedirs(imagepath)

In [25]:
imagename = os.path.join(*[os.path.dirname(os.path.dirname(os.getcwd())),'Results','Chapter 04','Chapter-004-Example-015-01.dot'])

In [26]:
graph.render(filename=imagename)

'C:\\Users\\AndreVermeulen\\Documents\\My Book\\apress\\Industrialized Machine Learning\\book\\IML\\Results\\Chapter 04\\Chapter-004-Example-015-01.dot.png'

In [27]:
imagename1 = os.path.join(*[os.path.dirname(os.path.dirname(os.getcwd())),'Results','Chapter 04','Chapter-004-Example-015-01.dot.png'])
imagename2 = os.path.join(*[os.path.dirname(os.path.dirname(os.getcwd())),'Results','Chapter 04','Chapter-004-Example-015-01.png'])
os.remove(imagename)
shutil.move(imagename1,imagename2)

'C:\\Users\\AndreVermeulen\\Documents\\My Book\\apress\\Industrialized Machine Learning\\book\\IML\\Results\\Chapter 04\\Chapter-004-Example-015-01.png'

In [28]:
gv.view(imagename2)

## Done

In [29]:
import datetime
now = datetime.datetime.now()
print('Done!',str(now))

Done! 2019-04-09 08:58:55.840393
