# **Standardization**

In [0]:
#Import
from sklearn import preprocessing
import numpy as np

In [0]:
#Training data
X_train = np.array([[ 1., -1.,  2.],
                    [ 2.,  0.,  0.],
                    [ 0.,  1., -1.]])


In [0]:
#Calculate the mean of X_train
X_train.mean(axis=0)

array([1.        , 0.        , 0.33333333])

In [0]:
#Calculate the standard deviation of X_train
X_train.std(axis=0)

array([0.81649658, 0.81649658, 1.24721913])

In [0]:
#Transform X_train with function scale to zero mean and unit variance
X_scaled = preprocessing.scale(X_train)

In [0]:
#Show X_scaled
X_scaled

array([[ 0.        , -1.22474487,  1.33630621],
       [ 1.22474487,  0.        , -0.26726124],
       [-1.22474487,  1.22474487, -1.06904497]])

In [0]:
#Calculate the mean of X_scaled
X_scaled.mean(axis=0)

array([0., 0., 0.])

In [0]:
#Calculate the standard deviation of X_scaled
X_scaled.std(axis=0)

array([1., 1., 1.])

"**StandardScaler**" is the equivalent function to "**scale**", but  with the estimator API.

Possible: Compute the mean and standard deviation on a *training set* so as to be able to later reapply the same transformation on the *testing set*.


In [0]:
#Apply StandardScaler and store mean & std of X_train
scaler = preprocessing.StandardScaler().fit(X_train)

In [0]:
#Calculate the mean (it is the original mean of X_train)
scaler.mean_

array([1.        , 0.        , 0.33333333])

In [0]:
#Transform X_train to zero mean and unit variance
scaler.transform(X_train)

array([[ 0.        , -1.22474487,  1.33630621],
       [ 1.22474487,  0.        , -0.26726124],
       [-1.22474487,  1.22474487, -1.06904497]])

In [0]:
#Define test data
X_test = [[-1., 1., 0.]]

In [0]:
#Transform test data in the same way as training data
scaler.transform(X_test)

array([[-2.44948974,  1.22474487, -0.26726124]])

# Normalization

In [0]:
#Define data X
X = [[ 1., -1.,  2.],
     [ 2.,  0.,  0.],
     [ 0.,  1., -1.]]

In [0]:
#Calculate the norm of X with L2 (MSE)
np.linalg.norm(X,axis=1)

array([2.44948974, 2.        , 1.41421356])

In [0]:
#Normalize X with L2 (MSE)
X_normalized = preprocessing.normalize(X)

In [0]:
#Show normalized X
X_normalized

array([[ 0.40824829, -0.40824829,  0.81649658],
       [ 1.        ,  0.        ,  0.        ],
       [ 0.        ,  0.70710678, -0.70710678]])

In [0]:
#Calculate the norm of X_normalized with L2 (MSE)
np.linalg.norm(X_normalized,axis=1)

array([1., 1., 1.])