In [9]:
import numpy as np
from sklearn import preprocessing

In [10]:
input_data = np.array([
    [5.1, -2.9, 3.3],
    [-1.2, 7.8, -6.1],
    [3.9, 0.4, 2.1],
    [7.3, -9.9, -4.5]
])

# Binarizer (binarization)

Converts any value below the threshold to 0, and any above it to 1

In [11]:
data_binarized = preprocessing.Binarizer(threshold=2.1).transform(input_data)
data_binarized

array([[1., 0., 1.],
       [0., 1., 0.],
       [1., 0., 0.],
       [1., 0., 0.]])

# Scale (mean removal)

Standardizes data

In [12]:
print("Original data")
print("Mean = {}".format(input_data.mean(axis=0)))
print("Standart deviation = {}".format(input_data.std(axis=0)))

Original data
Mean = [ 3.775 -1.15  -1.3  ]
Standart deviation = [3.12039661 6.36651396 4.0620192 ]


In [13]:
data_scaled = preprocessing.scale(input_data)
print("After scale")
print("Mean = {}".format(data_scaled.mean(axis=0)))
print("Standart deviation = {}".format(data_scaled.std(axis=0)))
data_scaled

After scale
Mean = [1.11022302e-16 0.00000000e+00 2.77555756e-17]
Standart deviation = [1. 1. 1.]


array([[ 0.42462551, -0.2748757 ,  1.13244172],
       [-1.59434861,  1.40579288, -1.18167831],
       [ 0.04005901,  0.24346134,  0.83702214],
       [ 1.12966409, -1.37437851, -0.78778554]])

# Min Max Scale (scaling)

Scales data to a given range

In [14]:
minmax_scaler = preprocessing.MinMaxScaler(feature_range=(0, 1))
dsmm = minmax_scaler.fit_transform(input_data)
dsmm

array([[0.74117647, 0.39548023, 1.        ],
       [0.        , 1.        , 0.        ],
       [0.6       , 0.5819209 , 0.87234043],
       [1.        , 0.        , 0.17021277]])

# Normalize (normalization)

Modifies the values so that they sum up to 1

In [15]:
data_normalized_l1 = preprocessing.normalize(input_data, norm="l1")
data_normalized_l2 = preprocessing.normalize(input_data, norm="l2")

print("L1 normalization")
print(data_normalized_l1)
print("L2 normalization")
print(data_normalized_l2)

L1 normalization
[[ 0.45132743 -0.25663717  0.2920354 ]
 [-0.0794702   0.51655629 -0.40397351]
 [ 0.609375    0.0625      0.328125  ]
 [ 0.33640553 -0.4562212  -0.20737327]]
L2 normalization
[[ 0.75765788 -0.43082507  0.49024922]
 [-0.12030718  0.78199664 -0.61156148]
 [ 0.87690281  0.08993875  0.47217844]
 [ 0.55734935 -0.75585734 -0.34357152]]
