## Apress - Industrialized Machine Learning Examples

Andreas Francois Vermeulen
2019

### This is an example add-on to a book and needs to be accepted as part of that copyright.

## Chapter-008-006-Preprocessing-Data-01

## Part A - Load Libraries

In [1]:
from sklearn import preprocessing

import numpy as np

## Part B - Setup Data set.

In [2]:
X_train = np.array([[ 1.1, -1.5,  2.5,  0.1],
                    [ 2.2,  0.1,  0.5,  2.2],
                    [ 0.1,  2.2, -2.5,  2.3],
                    [ 1.1,  1.2, -1.8,  6.3],
                    [ 0.8,  3.2,  5.5,  0.2],
                    [ 0.1,  1.2, -2.5,  1.3],
                    [ 1.1,  3.2,  1.8,  6.3],
                    [ 0.8,  3.2, -1.5,  0.2],
                    [ 2.1,  4.2, -1.5,  2.1]])

## Part C - Run Scale preprocessing

In [3]:
X_scaled = preprocessing.scale(X_train, 
                               axis=0, 
                               with_mean=True, 
                               with_std=False, 
                               copy=True
                              )

In [4]:
print(X_scaled)

[[ 0.05555556 -3.38888889  2.44444444 -2.23333333]
 [ 1.15555556 -1.78888889  0.44444444 -0.13333333]
 [-0.94444444  0.31111111 -2.55555556 -0.03333333]
 [ 0.05555556 -0.68888889 -1.85555556  3.96666667]
 [-0.24444444  1.31111111  5.44444444 -2.13333333]
 [-0.94444444 -0.68888889 -2.55555556 -1.03333333]
 [ 0.05555556  1.31111111  1.74444444  3.96666667]
 [-0.24444444  1.31111111 -1.55555556 -2.13333333]
 [ 1.05555556  2.31111111 -1.55555556 -0.23333333]]


In [5]:
print(X_scaled.mean(axis=0))

[ 0.00000000e+00  1.48029737e-16  9.86864911e-17 -1.48029737e-16]


In [6]:
print(X_scaled.std(axis=0))

[0.69619779 1.70713825 2.58633637 2.27791328]


In [7]:
print(X_scaled.min(axis=0))

[-0.94444444 -3.38888889 -2.55555556 -2.23333333]


In [8]:
print(X_scaled.max(axis=0))

[1.15555556 2.31111111 5.44444444 3.96666667]


## Part D - Run RobustScaler preprocessing

In [9]:
scaler = preprocessing.RobustScaler(with_centering=True, 
                                    with_scaling=True, 
                                    quantile_range=(25.0, 75.0), 
                                    copy=True
                                   ).fit(X_train)

In [10]:
print(scaler)

RobustScaler(copy=True, quantile_range=(25.0, 75.0), with_centering=True,
       with_scaling=True)


In [11]:
print(scaler.center_)

[ 1.1  2.2 -1.5  2.1]


In [12]:
print(scaler.scale_)

[0.3 2.  3.6 2.1]


In [13]:
print(scaler.transform(X_train))

[[ 0.         -1.85        1.11111111 -0.95238095]
 [ 3.66666667 -1.05        0.55555556  0.04761905]
 [-3.33333333  0.         -0.27777778  0.0952381 ]
 [ 0.         -0.5        -0.08333333  2.        ]
 [-1.          0.5         1.94444444 -0.9047619 ]
 [-3.33333333 -0.5        -0.27777778 -0.38095238]
 [ 0.          0.5         0.91666667  2.        ]
 [-1.          0.5         0.         -0.9047619 ]
 [ 3.33333333  1.          0.          0.        ]]


In [14]:
X_test = [[-1.5, 1.3, 6.5, 3.0]]

In [15]:
print(scaler.transform(X_test))

[[-8.66666667 -0.45        2.22222222  0.42857143]]


## Done

In [16]:
import datetime
now = datetime.datetime.now()
print('Done!',str(now))

Done! 2019-04-22 20:25:02.381620
