## Apress - Industrialized Machine Learning Examples

Andreas Francois Vermeulen
2019

### This is an example add-on to a book and needs to be accepted as part of that copyright.

# Chapter 05 Example 001A

In [1]:
sfeature=['F01', 'F02', 'F04']

## Part A - Load Libraries

In [2]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.svm import SVC

from sklearn.preprocessing import StandardScaler

from sklearn.model_selection import train_test_split

import pandas as pd
import numpy as np
import os

## Part B - Load the Roses dataset

In [3]:
fileName = '../../Data/Roses02.csv'
fileFullName = os.path.abspath(fileName)
print(fileFullName)

C:\Users\AndreVermeulen\Documents\My Book\apress\Industrial Machine Learning\book\GitHub\Upload\industrial-machine-learning\Data\Roses02.csv


In [4]:
datadf= pd.read_csv(fileFullName, header=0)
print(datadf.shape)
print(datadf.columns)

(600, 6)
Index(['F01', 'F02', 'F03', 'F04', 'T01', 'T02'], dtype='object')


In [5]:
data_X = datadf[sfeature].copy(deep=True)

In [6]:
data_X2 = np.array(data_X,dtype='float64')

In [7]:
data_y = datadf['T01'].copy(deep=True)
data_y.columns = (['T'])

In [8]:
data_y2 = np.array(data_y,dtype='int')

## Part C - Select Training and Test Data Sets

In [9]:
X_train, X_test, y_train, y_test = train_test_split(data_X2, data_y2, train_size=0.7, test_size=0.3, random_state=50)

## Part D - Build Scaler

In [10]:
transformer = StandardScaler(copy=True, with_mean=True, with_std=False)
scaler = transformer.fit(X_train, y_train)

In [11]:
X_train_scale = scaler.transform(X_train)
X_test_scale = scaler.transform(X_test)
print(X_train[0],X_train_scale[0])

[5.009 3.395 0.152] [-0.81441667  0.32359048 -1.06877619]


In [12]:
print(scaler.get_params(deep=True))

{'copy': True, 'with_mean': True, 'with_std': False}


In [13]:
s=np.array(scaler.mean_)
print('Features:', s.shape[0])
print('Samples:', scaler.n_samples_seen_)
print('Scale:', scaler.scale_)
print('Mean:',scaler.mean_ )
print('variance:',scaler.var_)

Features: 3
Samples: 420
Scale: None
Mean: [5.82341667 3.07140952 1.22077619]
variance: None


## Part E - Build Base ML using Support Vector Classification (SVC) algorithm

In [14]:
svc=SVC(max_iter=5000, 
        gamma='auto', 
        class_weight='balanced', 
        probability=True, 
        kernel='linear', 
        random_state=0, 
        verbose=False)

## Part F - Execute AdaBoost = 1

In [15]:
clf1 = AdaBoostClassifier(algorithm='SAMME', 
                          n_estimators=1, 
                          base_estimator=svc, 
                          learning_rate=1, 
                          random_state=0)

clf1.fit(X_train_scale, y_train)

score1 = clf1.score(X_test_scale,y_test)

In [16]:
for i in range(clf1.n_classes_):
    print('Class: %3d > %s' % (i, clf1.classes_[i]))

Class:   0 > 1
Class:   1 > 2
Class:   2 > 3


In [17]:
for i in range(len(clf1.estimator_weights_)):
    print('Estimator %03d > weight: %7.5f and error: %7.5f' % ((i+1), clf1.estimator_weights_[i],clf1.estimator_errors_[i]))

Estimator 001 > weight: 1.34373 and error: 0.34286


In [18]:
print('Results for AdaBoost (1): %7.4f %%' % (score1*100))

Results for AdaBoost (1): 67.2222 %


## Part G - Execute AdaBoost = 5

In [19]:
clf2 = AdaBoostClassifier(algorithm='SAMME', n_estimators=5, base_estimator=svc, learning_rate=1, random_state=0)
clf2.fit(X_train_scale, y_train)
score2 = clf2.score(X_test_scale,y_test)

In [20]:
for i in range(len(clf2.estimator_weights_)):
    print('Estimator %03d > weight: %7.5f and error: %7.5f' % ((i+1), clf2.estimator_weights_[i],clf2.estimator_errors_[i]))

Estimator 001 > weight: 1.34373 and error: 0.34286
Estimator 002 > weight: 1.34491 and error: 0.34259
Estimator 003 > weight: 1.34602 and error: 0.34234
Estimator 004 > weight: 1.30833 and error: 0.35088
Estimator 005 > weight: 1.38629 and error: 0.33333


In [21]:
print('Results for AdaBoost (5): %7.4f %%'% (score2*100))

Results for AdaBoost (5): 67.7778 %


## Part H - Execute AdaBoost = 10

In [22]:
clf3 = AdaBoostClassifier(algorithm='SAMME', n_estimators=10, base_estimator=svc, learning_rate=1, random_state=0)
clf3.fit(X_train_scale, y_train)
score3 = clf3.score(X_test_scale,y_test)

In [23]:
for i in range(len(clf3.estimator_weights_)):
    print('Estimator %03d > weight: %7.5f and error: %7.5f' % ((i+1), clf3.estimator_weights_[i],clf3.estimator_errors_[i]))

Estimator 001 > weight: 1.34373 and error: 0.34286
Estimator 002 > weight: 1.34491 and error: 0.34259
Estimator 003 > weight: 1.34602 and error: 0.34234
Estimator 004 > weight: 1.30833 and error: 0.35088
Estimator 005 > weight: 1.38629 and error: 0.33333
Estimator 006 > weight: 1.38629 and error: 0.33333
Estimator 007 > weight: 1.38629 and error: 0.33333
Estimator 008 > weight: 1.38629 and error: 0.33333
Estimator 009 > weight: 1.38629 and error: 0.33333
Estimator 010 > weight: 1.38629 and error: 0.33333


In [24]:
print('Results for AdaBoost (10): %7.5f %%'% (score3*100))

Results for AdaBoost (10): 73.33333 %


## Part I - Execute AdaBoost = 20

In [25]:
clf4 = AdaBoostClassifier(algorithm='SAMME', n_estimators=20, base_estimator=svc, learning_rate=1, random_state=0)
clf4.fit(X_train_scale, y_train)
score4 = clf4.score(X_test_scale,y_test)

In [26]:
for i in range(len(clf4.estimator_weights_)):
    print('Estimator %03d > weight: %7.5f and error: %7.5f' % ((i+1), clf4.estimator_weights_[i],clf4.estimator_errors_[i]))

Estimator 001 > weight: 1.34373 and error: 0.34286
Estimator 002 > weight: 1.34491 and error: 0.34259
Estimator 003 > weight: 1.34602 and error: 0.34234
Estimator 004 > weight: 1.30833 and error: 0.35088
Estimator 005 > weight: 1.38629 and error: 0.33333
Estimator 006 > weight: 1.38629 and error: 0.33333
Estimator 007 > weight: 1.38629 and error: 0.33333
Estimator 008 > weight: 1.38629 and error: 0.33333
Estimator 009 > weight: 1.38629 and error: 0.33333
Estimator 010 > weight: 1.38629 and error: 0.33333
Estimator 011 > weight: 1.38629 and error: 0.33333
Estimator 012 > weight: 1.38629 and error: 0.33333
Estimator 013 > weight: 1.38629 and error: 0.33333
Estimator 014 > weight: 1.38629 and error: 0.33333
Estimator 015 > weight: 1.38629 and error: 0.33333
Estimator 016 > weight: 1.38629 and error: 0.33333
Estimator 017 > weight: 1.38629 and error: 0.33333
Estimator 018 > weight: 1.38629 and error: 0.33333
Estimator 019 > weight: 1.38629 and error: 0.33333
Estimator 020 > weight: 1.38629

In [27]:
print('Results for AdaBoost (20): %7.4f %%'% (score4*100))

Results for AdaBoost (20): 73.3333 %


## Part J - Improvement Analysis

In [28]:
s1=round(score1,4)
s4=round(score4,4)
print('Score improvement (%5.3f %% to %5.3f %%), so a %5.3f %% improvement!' % (s1*100,s4*100,((s4-s1)/s1)*100))

Score improvement (67.220 % to 73.330 %), so a 9.090 % improvement!


## Done

In [29]:
import datetime
now = datetime.datetime.now()
print('Done!',str(now))

Done! 2019-10-19 17:41:36.583454
