## Apress - Industrialized Machine Learning Examples

Andreas Francois Vermeulen
2019

### This is an example add-on to a book and needs to be accepted as part of that copyright.

# Chapter 005 Example 001B

In [1]:
sfeature=['F01', 'F02', 'F03', 'F04', 'F05', 'F06', 'F07', 'F08']
starget=['T02']

## Part A - Load Libraries

In [2]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import ExtraTreeClassifier
from sklearn.preprocessing import RobustScaler

from sklearn.model_selection import train_test_split

import pandas as pd
import numpy as np
import os

## Part B - Load the Sickness dataset

In [3]:
fileName = '../../Data/Sickness06.csv'
fileFullName = os.path.abspath(fileName)
print(fileFullName)

C:\Users\AndreVermeulen\Documents\My Book\apress\Industrial Machine Learning\book\GitHub\Upload\industrial-machine-learning\Data\Sickness06.csv


In [4]:
datadf= pd.read_csv(fileFullName, header=0)
datadf.dropna(axis=0, how='any',inplace=True)
print(datadf.shape)
print(datadf.columns)

(5000, 10)
Index(['F01', 'F02', 'F03', 'F04', 'F05', 'F06', 'F07', 'F08', 'T01', 'T02'], dtype='object')


In [5]:
data_X = datadf[sfeature].copy(deep=True)

In [6]:
data_X2 = np.array(data_X,dtype='float64')

In [7]:
data_y = datadf[starget].copy(deep=True)
data_y.columns = (['T'])

In [8]:
data_y2 = np.array(data_y['T'],dtype='int')

## Part C - Select Training and Test Data Sets

In [9]:
X_train, X_test, y_train, y_test = train_test_split(data_X2, data_y2, train_size=0.7, test_size=0.3, random_state=150)

## Part D - Build Scaler

In [10]:
transformer = RobustScaler(copy=True, quantile_range=(25.0, 75.0), with_centering=True, with_scaling=True)
scaler = transformer.fit(X_train, y_train)

In [11]:
X_train_scale = scaler.transform(X_train)
X_test_scale = scaler.transform(X_test)
print(X_train[0],X_train_scale[0])

[ 0.    76.    53.    11.     1.    23.002  0.246 23.   ] [-0.6        -0.93023256 -1.05882353 -0.38709677 -0.31538462 -0.9837766
 -0.35210356 -0.375     ]


In [12]:
s=np.array(scaler.scale_)
print('Features:', s.shape[0])
print('Samples:', X_train_scale.shape[0])
print('Center:', scaler.center_)
print('Scale:', scaler.scale_ )

Features: 8
Samples: 3500
Center: [  3.     116.      71.      23.      42.      32.2495   0.382   29.    ]
Scale: [  5.       43.       17.       31.      130.        9.4       0.38625
  16.     ]


## Part E - Build Base ML using Extremely randomized tree classifier algorithm

In [13]:
etc=ExtraTreeClassifier(criterion='entropy', 
                        splitter='best', 
                        random_state=0, 
                        max_depth=2).fit(scaler.transform(X_test),y_test)

## Part F - Execute AdaBoost = 1

In [14]:
clf1 = AdaBoostClassifier(algorithm='SAMME.R', n_estimators=1, base_estimator=etc, learning_rate=1, random_state=0)
clf1.fit(X_train_scale, y_train)
score1 = clf1.score(X_test_scale,y_test)

In [15]:
for i in range(clf1.n_classes_):
    print('Class: %3d > %s' % (i, clf1.classes_[i]))

Class:   0 > 1
Class:   1 > 2


In [16]:
for i in range(len(clf1.estimator_weights_)):
    print('Estimator %03d > weight: %7.5f and error: %7.5f' % ((i+1), clf1.estimator_weights_[i],clf1.estimator_errors_[i]))

Estimator 001 > weight: 1.00000 and error: 0.28714


In [17]:
print('Results for AdaBoost (1): %7.4f %%'% (score1*100))

Results for AdaBoost (1): 70.2667 %


## Part G - Execute AdaBoost = 50

In [18]:
clf2 = AdaBoostClassifier(algorithm='SAMME.R', n_estimators=50, base_estimator=etc, learning_rate=1, random_state=0)
clf2.fit(X_train_scale, y_train)
score2 = clf2.score(X_test_scale,y_test)

In [19]:
for i in range(len(clf2.estimator_weights_)):
    print('Estimator %03d > weight: %7.5f and error: %7.5f' % ((i+1), clf2.estimator_weights_[i],clf2.estimator_errors_[i]))

Estimator 001 > weight: 1.00000 and error: 0.28714
Estimator 002 > weight: 1.00000 and error: 0.44660
Estimator 003 > weight: 1.00000 and error: 0.36690
Estimator 004 > weight: 1.00000 and error: 0.41544
Estimator 005 > weight: 1.00000 and error: 0.42886
Estimator 006 > weight: 1.00000 and error: 0.37535
Estimator 007 > weight: 1.00000 and error: 0.40993
Estimator 008 > weight: 1.00000 and error: 0.45339
Estimator 009 > weight: 1.00000 and error: 0.47172
Estimator 010 > weight: 1.00000 and error: 0.49075
Estimator 011 > weight: 1.00000 and error: 0.48671
Estimator 012 > weight: 1.00000 and error: 0.47567
Estimator 013 > weight: 1.00000 and error: 0.44116
Estimator 014 > weight: 1.00000 and error: 0.47688
Estimator 015 > weight: 1.00000 and error: 0.45180
Estimator 016 > weight: 1.00000 and error: 0.45706
Estimator 017 > weight: 1.00000 and error: 0.43335
Estimator 018 > weight: 1.00000 and error: 0.46247
Estimator 019 > weight: 1.00000 and error: 0.44465
Estimator 020 > weight: 1.00000

In [20]:
print('Results for AdaBoost (5): %7.4f %%'% (score2*100))

Results for AdaBoost (5): 83.9333 %


## Part H - Execute AdaBoost = 100

In [21]:
clf3 = AdaBoostClassifier(algorithm='SAMME.R', n_estimators=100, base_estimator=etc, learning_rate=1, random_state=0)
clf3.fit(X_train_scale, y_train)
score3 = clf3.score(X_test_scale,y_test)

In [22]:
for i in range(len(clf3.estimator_weights_)):
    print('Estimator %03d > weight: %7.5f and error: %7.5f' % ((i+1), clf3.estimator_weights_[i],clf3.estimator_errors_[i]))

Estimator 001 > weight: 1.00000 and error: 0.28714
Estimator 002 > weight: 1.00000 and error: 0.44660
Estimator 003 > weight: 1.00000 and error: 0.36690
Estimator 004 > weight: 1.00000 and error: 0.41544
Estimator 005 > weight: 1.00000 and error: 0.42886
Estimator 006 > weight: 1.00000 and error: 0.37535
Estimator 007 > weight: 1.00000 and error: 0.40993
Estimator 008 > weight: 1.00000 and error: 0.45339
Estimator 009 > weight: 1.00000 and error: 0.47172
Estimator 010 > weight: 1.00000 and error: 0.49075
Estimator 011 > weight: 1.00000 and error: 0.48671
Estimator 012 > weight: 1.00000 and error: 0.47567
Estimator 013 > weight: 1.00000 and error: 0.44116
Estimator 014 > weight: 1.00000 and error: 0.47688
Estimator 015 > weight: 1.00000 and error: 0.45180
Estimator 016 > weight: 1.00000 and error: 0.45706
Estimator 017 > weight: 1.00000 and error: 0.43335
Estimator 018 > weight: 1.00000 and error: 0.46247
Estimator 019 > weight: 1.00000 and error: 0.44465
Estimator 020 > weight: 1.00000

In [23]:
print('Results for AdaBoost (10): %7.4f %%'% (score3*100))

Results for AdaBoost (10): 89.4000 %


## Part I - Execute AdaBoost = 1000

In [24]:
clf4 = AdaBoostClassifier(algorithm='SAMME.R', n_estimators=1000, base_estimator=etc, learning_rate=1, random_state=0)
clf4.fit(X_train_scale, y_train)
score4 = clf4.score(X_test_scale,y_test)

In [25]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

<IPython.core.display.Javascript object>

In [26]:
for i in range(len(clf4.estimator_weights_)):
    print('Estimator %04d > weight: %7.5f and error: %7.5f' % ((i+1), clf4.estimator_weights_[i],clf4.estimator_errors_[i]))

Estimator 0001 > weight: 1.00000 and error: 0.28714
Estimator 0002 > weight: 1.00000 and error: 0.44660
Estimator 0003 > weight: 1.00000 and error: 0.36690
Estimator 0004 > weight: 1.00000 and error: 0.41544
Estimator 0005 > weight: 1.00000 and error: 0.42886
Estimator 0006 > weight: 1.00000 and error: 0.37535
Estimator 0007 > weight: 1.00000 and error: 0.40993
Estimator 0008 > weight: 1.00000 and error: 0.45339
Estimator 0009 > weight: 1.00000 and error: 0.47172
Estimator 0010 > weight: 1.00000 and error: 0.49075
Estimator 0011 > weight: 1.00000 and error: 0.48671
Estimator 0012 > weight: 1.00000 and error: 0.47567
Estimator 0013 > weight: 1.00000 and error: 0.44116
Estimator 0014 > weight: 1.00000 and error: 0.47688
Estimator 0015 > weight: 1.00000 and error: 0.45180
Estimator 0016 > weight: 1.00000 and error: 0.45706
Estimator 0017 > weight: 1.00000 and error: 0.43335
Estimator 0018 > weight: 1.00000 and error: 0.46247
Estimator 0019 > weight: 1.00000 and error: 0.44465
Estimator 00

In [27]:
print('Results for AdaBoost (1000): %7.4f %%'% (score4*100))

Results for AdaBoost (1000): 97.2000 %


## Part J - Improvement Analysis

In [28]:
s1=round(score1,4)
s4=round(score4,4)
print('Score improvement (%5.3f %% to %5.3f %%), so a %5.3f %% improvement!' % (s1*100,s4*100,((s4-s1)/s1)*100))

Score improvement (70.270 % to 97.200 %), so a 38.324 % improvement!


## Done

In [29]:
import datetime
now = datetime.datetime.now()
print('Done!',str(now))

Done! 2019-10-19 17:41:46.195457
