# Binarization

In [75]:
from sklearn import preprocessing
import numpy as np 

data = np.array([[2.2, 5.9, -1.8], [5.4, -3.2, -5.1], [-1.9, 4.2, 3.2]])


In [76]:
bindata = preprocessing.Binarizer(threshold=1.5).transform(data)
print('Binarized data:\n\n', bindata)

Binarized data:

 [[1. 1. 0.]
 [1. 0. 0.]
 [0. 1. 1.]]


# Standardization

In [77]:
print('Mean (before)= ', data.mean(axis=0))
print('Standard Deviation (before)= ', data.std(axis=0))

Mean (before)=  [ 1.9         2.3        -1.23333333]
Standard Deviation (before)=  [2.98775278 3.95052739 3.41207008]


In [78]:
scaled_data = preprocessing.scale(data)

print('Mean (after)= ', scaled_data.mean(axis=0))
print('Standard Deviation (after)= ', scaled_data.std(axis=0))

Mean (after)=  [0.00000000e+00 0.00000000e+00 7.40148683e-17]
Standard Deviation (after)=  [1. 1. 1.]


# Scaling

###### StandardScaler - => features with a mean=0 and variance=1
###### MinMaxScaler - => features in a 0 to 1 range
###### Normalizer - => feature vector to a Euclidean length=1

In [79]:
data

array([[ 2.2,  5.9, -1.8],
       [ 5.4, -3.2, -5.1],
       [-1.9,  4.2,  3.2]])

In [80]:
minmax_scaler = preprocessing.MinMaxScaler(feature_range=(0,1))
data_minmax = minmax_scaler.fit_transform(data)
print('MinMaxScaler applied on the data:\n', data_minmax)

MinMaxScaler applied on the data:
 [[0.56164384 1.         0.39759036]
 [1.         0.         0.        ]
 [0.         0.81318681 1.        ]]


# Normalization
###### bringing the values of each feature vector on a common scale

###### L1 - Least Absolute Deviations - sum of absolute values (on each row) = 1; it is insensitive to outliers
###### L2 - Least Squares - sum of squares (on each row) = 1; takes outliers in consideration during training

In [81]:
data

array([[ 2.2,  5.9, -1.8],
       [ 5.4, -3.2, -5.1],
       [-1.9,  4.2,  3.2]])

In [82]:
data_l1 = preprocessing.normalize(data, norm='l1')
data_l2 = preprocessing.normalize(data, norm='l2')

print('L1-normalized data:\n', data_l1)
print('\nL2-normalized data:\n', data_l2)

L1-normalized data:
 [[ 0.22222222  0.5959596  -0.18181818]
 [ 0.39416058 -0.23357664 -0.37226277]
 [-0.20430108  0.4516129   0.34408602]]

L2-normalized data:
 [[ 0.3359268   0.90089461 -0.2748492 ]
 [ 0.6676851  -0.39566524 -0.63059148]
 [-0.33858465  0.74845029  0.57024784]]
