In [1]:
import numpy as np
from sklearn.preprocessing import normalize
from sklearn.preprocessing import StandardScaler

In [2]:
X = np.asarray([[1, 2, 3], [2, 4, 7], [3, 6, 9]], dtype=np.float32)

In [3]:
print(X)

[[ 1.  2.  3.]
 [ 2.  4.  7.]
 [ 3.  6.  9.]]


- Zero-centered and l2-norm normalization for each input feature

In [4]:
# mean vector for centering data
X_offset = np.average(X, axis=0)
print(X_offset)

[ 2.          4.          6.33333349]


In [5]:
# Centered data
X_centered = X - X_offset
print(X_centered)

[[-1.         -2.         -3.33333349]
 [ 0.          0.          0.66666651]
 [ 1.          2.          2.66666651]]


In [6]:
X_normalized, X_centered_norm = normalize(X_centered, axis=0, 
                                          copy=False, return_norm=True)
print(X_normalized)
print("=" * 60)
print(X_centered_norm)

[[-0.70710677 -0.70710677 -0.77151674]
 [ 0.          0.          0.1543033 ]
 [ 0.70710677  0.70710677  0.61721331]]
[ 1.41421354  2.82842708  4.32049417]


- Standardization: Calculating means and standard deviations

In [7]:
# mean vector
X_offset = np.average(X, axis=0)

# standard deviations
X_std = np.std(X, axis=0)

print(X_offset)
print(X_std)

[ 2.          4.          6.33333349]
[ 0.81649661  1.63299322  2.49443841]


In [8]:
X_standardized_v2 = (X - X_offset) / X_std
print(X_standardized_v2)

[[-1.2247448  -1.2247448  -1.33630621]
 [ 0.          0.          0.26726115]
 [ 1.2247448   1.2247448   1.06904483]]


- Standardization: Using `sklearn.preprocessing.StandardScaler`

In [9]:
scaler = StandardScaler()
X_standardized = scaler.fit_transform(X)
print(X_standardized)

[[-1.2247448  -1.2247448  -1.33630621]
 [ 0.          0.          0.26726115]
 [ 1.2247448   1.2247448   1.06904483]]
