# Scaler Examples

<div id="toc"></div>



## View Data

In [1]:
from sklearn import datasets
iris = datasets.load_iris()

In [2]:
iris.feature_names

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']

In [3]:
iris.target_names

array(['setosa', 'versicolor', 'virginica'],
      dtype='<U10')

In [12]:
data = iris.data

In [13]:
target = iris.target

## StandardScaler()

In [23]:
from sklearn.preprocessing import StandardScaler

In [24]:
scaler = StandardScaler()
scaled_data = scaler.fit_transform(data)

In [19]:
scaled_data.mean(axis=0)

array([ -1.69031455e-15,  -1.63702385e-15,  -1.48251781e-15,
        -1.62314606e-15])

In [22]:
scaled_data.std(axis=0)

array([ 1.,  1.,  1.,  1.])

In [20]:
scaled_data.max(axis=0)

array([ 2.4920192 ,  3.11468391,  1.78634131,  1.71090158])

In [21]:
scaled_data.min(axis=0)

array([-1.87002413, -2.43898725, -1.56873522, -1.4444497 ])

## MinMaxScaler()

In [27]:
from sklearn.preprocessing import MinMaxScaler

In [28]:
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data)

In [29]:
scaled_data.min()

0.0

In [30]:
scaled_data.max()

1.0

## MaxAbsScaler()

In [42]:
from sklearn.preprocessing import MaxAbsScaler

In [43]:
scaler = MaxAbsScaler()
scaled_data = scaler.fit_transform(data)

In [46]:
scaled_data.min(axis=0)

array([ 0.5443038 ,  0.45454545,  0.14492754,  0.04      ])

In [47]:
scaled_data.max(axis=1)

array([ 0.79545455,  0.68181818,  0.72727273,  0.70454545,  0.81818182,
        0.88636364,  0.77272727,  0.77272727,  0.65909091,  0.70454545,
        0.84090909,  0.77272727,  0.68181818,  0.68181818,  0.90909091,
        1.        ,  0.88636364,  0.79545455,  0.86363636,  0.86363636,
        0.77272727,  0.84090909,  0.81818182,  0.75      ,  0.77272727,
        0.68181818,  0.77272727,  0.79545455,  0.77272727,  0.72727273,
        0.70454545,  0.77272727,  0.93181818,  0.95454545,  0.70454545,
        0.72727273,  0.79545455,  0.70454545,  0.68181818,  0.77272727,
        0.79545455,  0.56962025,  0.72727273,  0.79545455,  0.86363636,
        0.68181818,  0.86363636,  0.72727273,  0.84090909,  0.75      ,
        0.88607595,  0.81012658,  0.87341772,  0.69620253,  0.82278481,
        0.72151899,  0.79746835,  0.62025316,  0.83544304,  0.65822785,
        0.63291139,  0.74683544,  0.75949367,  0.7721519 ,  0.70886076,
        0.84810127,  0.70886076,  0.73417722,  0.78481013,  0.70

## Normalizer() - 樣本正規化

In [32]:
from sklearn.preprocessing import Normalizer

In [33]:
n_max = Normalizer(norm='max')

In [35]:
n_data = n_max.fit_transform(data)

In [38]:
n_data.max(axis=0)

array([ 1.        ,  0.78846154,  0.95238095,  0.4137931 ])

In [49]:
n_data.max(axis=1)

array([ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.])

In [39]:
n_data.min(axis=0)

array([ 1.        ,  0.33766234,  0.20689655,  0.01923077])

In [50]:
n_data.min(axis=1)

array([ 0.03921569,  0.04081633,  0.04255319,  0.04347826,  0.04      ,
        0.07407407,  0.06521739,  0.04      ,  0.04545455,  0.02040816,
        0.03703704,  0.04166667,  0.02083333,  0.02325581,  0.03448276,
        0.07017544,  0.07407407,  0.05882353,  0.05263158,  0.05882353,
        0.03703704,  0.07843137,  0.04347826,  0.09803922,  0.04166667,
        0.04      ,  0.08      ,  0.03846154,  0.03846154,  0.04255319,
        0.04166667,  0.07407407,  0.01923077,  0.03636364,  0.02040816,
        0.04      ,  0.03636364,  0.02040816,  0.04545455,  0.03921569,
        0.06      ,  0.06666667,  0.04545455,  0.12      ,  0.07843137,
        0.0625    ,  0.03921569,  0.04347826,  0.03773585,  0.04      ,
        0.2       ,  0.234375  ,  0.2173913 ,  0.23636364,  0.23076923,
        0.22807018,  0.25396825,  0.20408163,  0.1969697 ,  0.26923077,
        0.2       ,  0.25423729,  0.16666667,  0.2295082 ,  0.23214286,
        0.20895522,  0.26785714,  0.17241379,  0.24193548,  0.19

In [40]:
n_data.mean(axis=0)

array([ 1.        ,  0.53442783,  0.61821394,  0.19372947])

In [37]:
n_data.std(axis=0)

array([ 0.        ,  0.11519629,  0.23966277,  0.11313846])

## RobustScaler()

In [57]:
from sklearn.preprocessing import RobustScaler

In [58]:
rb_scaler = RobustScaler(quantile_range=(15,85))

In [59]:
scaled_data = rb_scaler.fit_transform(data)

In [60]:
scaled_data.min(axis=0)

array([-0.88235294, -1.11111111, -0.80432173, -0.64343164])

In [61]:
scaled_data.max(axis=0)

array([ 1.23529412,  1.55555556,  0.6122449 ,  0.64343164])

## References

1. http://scikit-learn.org/stable/modules/preprocessing.html
1. Giuseppe B., Machine Learning Algorithm, Packt