In [1]:
# Enable code formatting using external plugin: nb_black.
%reload_ext nb_black

<IPython.core.display.Javascript object>

In [2]:
# Data manipulation libraries
import numpy as np

# Data visualization libraries
from prettytable import PrettyTable

# Data modeling libraries
from sklearn.preprocessing import normalize, StandardScaler

# Library versions used in below EDA.
print("NumPy version:", np.__version__)

# Configure NumPy.
# Set `Line width` to Maximum 130 characters in the output, post which it will continue in next line.
np.set_printoptions(linewidth=130)

NumPy version: 1.21.5


<IPython.core.display.Javascript object>

In [3]:
X = np.array([[100, -3, 3, 0, 100], [125, 1, 3, 0, 1], [150, 2, 3, 0, 0]])
X

array([[100,  -3,   3,   0, 100],
       [125,   1,   3,   0,   1],
       [150,   2,   3,   0,   0]])

<IPython.core.display.Javascript object>

In [4]:
mean = X.mean(axis=0)
mean

array([125.        ,   0.        ,   3.        ,   0.        ,  33.66666667])

<IPython.core.display.Javascript object>

In [5]:
std = X.std(axis=0)
std

array([20.41241452,  2.1602469 ,  0.        ,  0.        , 46.90652644])

<IPython.core.display.Javascript object>

In [6]:
z_mean = X - mean
z_mean

array([[-25.        ,  -3.        ,   0.        ,   0.        ,  66.33333333],
       [  0.        ,   1.        ,   0.        ,   0.        , -32.66666667],
       [ 25.        ,   2.        ,   0.        ,   0.        , -33.66666667]])

<IPython.core.display.Javascript object>

In [7]:
z_mean / std

  z_mean / std


array([[-1.22474487, -1.38873015,         nan,         nan,  1.41416   ],
       [ 0.        ,  0.46291005,         nan,         nan, -0.6964205 ],
       [ 1.22474487,  0.9258201 ,         nan,         nan, -0.7177395 ]])

<IPython.core.display.Javascript object>

In [8]:
np.zeros_like(X, dtype="float")

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

<IPython.core.display.Javascript object>

### Implement Mean Normalization

In [9]:
def feature_scaling(X):
    print("X:")
    print(X)
    # print("Shape:", X.shape)

    mean = X.mean(axis=0)
    std = X.std(axis=0)

    z_mean = X - mean
    n_X = np.divide(z_mean, std, out=np.zeros_like(X, dtype="float"), where=std != 0)
    print("Normalized X:")
    print(n_X)

<IPython.core.display.Javascript object>

#### Testing

In [10]:
feature_scaling(np.array([[100, 1, 0, -1, 56]]))

X:
[[100   1   0  -1  56]]
Normalized X:
[[0. 0. 0. 0. 0.]]


<IPython.core.display.Javascript object>

In [11]:
X = np.array([[100, -3, 3, 0, 100], [125, 1, 3, 0, 1], [150, 2, 3, 0, 0]])
feature_scaling(X)

X:
[[100  -3   3   0 100]
 [125   1   3   0   1]
 [150   2   3   0   0]]
Normalized X:
[[-1.22474487 -1.38873015  0.          0.          1.41416   ]
 [ 0.          0.46291005  0.          0.         -0.6964205 ]
 [ 1.22474487  0.9258201   0.          0.         -0.7177395 ]]


<IPython.core.display.Javascript object>

In [12]:
feature_scaling(np.array([[100], [1], [0], [-1], [56]]))

X:
[[100]
 [  1]
 [  0]
 [ -1]
 [ 56]]
Normalized X:
[[ 1.69160781]
 [-0.74253715]
 [-0.76712447]
 [-0.79171179]
 [ 0.6097656 ]]


<IPython.core.display.Javascript object>

### Mean Normalization using scikit-learn

In [13]:
def feature_scaling2(X):
    print("X:")
    print(X)
    # print("Shape:", X.shape)

    # nrm_X = normalize(X, norm="l2")
    # print("Normalized X:")
    # print(nrm_X)

    scaler = StandardScaler().fit(X)
    std_X = scaler.transform(X)

    print("Standardized X:")
    print(std_X)

<IPython.core.display.Javascript object>

#### Testing

In [14]:
feature_scaling2(np.array([[100, 1, 0, -1, 56]]))

X:
[[100   1   0  -1  56]]
Standardized X:
[[0. 0. 0. 0. 0.]]


<IPython.core.display.Javascript object>

In [15]:
X = np.array([[100, -3, 3, 0, 100], [125, 1, 3, 0, 1], [150, 2, 3, 0, 0]])
feature_scaling2(X)

X:
[[100  -3   3   0 100]
 [125   1   3   0   1]
 [150   2   3   0   0]]
Standardized X:
[[-1.22474487 -1.38873015  0.          0.          1.41416   ]
 [ 0.          0.46291005  0.          0.         -0.6964205 ]
 [ 1.22474487  0.9258201   0.          0.         -0.7177395 ]]


<IPython.core.display.Javascript object>

In [16]:
feature_scaling2(np.array([[100], [1], [0], [-1], [56]]))

X:
[[100]
 [  1]
 [  0]
 [ -1]
 [ 56]]
Standardized X:
[[ 1.69160781]
 [-0.74253715]
 [-0.76712447]
 [-0.79171179]
 [ 0.6097656 ]]


<IPython.core.display.Javascript object>