In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
import missingno as msn

## Rescaling

**“Rescaling”** a vector means to add or subtract a constant and then multiply or divide by a constant, as you would do to change the units of measurement of the data, **for example, to convert a temperature from Celsius to Fahrenheit**

### TWO TYPES:

**Normalization** helps to scale down the feature between 0 to 1.

**Standardization** helps to scale down the feature based on standard normal distribution (Mean = 0 and Standard Deviation = 1)

## Normalization


In [3]:
# min max scaling

In [4]:
df = pd.DataFrame({"Class": [1,1,1,1,1,1,1,2,2,2,2,2,3,3,4,3,5,3,3,2,2,2,3,3],
                   "Alcohol": [14.35,14.36,16.28,19.22,16.22,12.34,13.44,14.45,14.68,20.10,22.33,24.12,18.34,14.34,16.77,
                              19.22,18,33,20.44,14.56,17.89,15.80,16.54,13.21],
                  "Malic": [1.71,2.34,3.45,2.56,6.58,3.77,2.34,1.99,6.54,7.65,8.22,8.23,4.44,5.69,2.34,9.12,8.43,7.16,6.75,
                           8.88,4.91,3.38,2.35,4.57]})
df.head()

Unnamed: 0,Class,Alcohol,Malic
0,1,14.35,1.71
1,1,14.36,2.34
2,1,16.28,3.45
3,1,19.22,2.56
4,1,16.22,6.58


In [5]:
# creating an object of MinMaxScaler named scaling

In [6]:
scaling = MinMaxScaler()

In [7]:
scaling.fit_transform(df[["Alcohol","Malic"]])

array([[0.09728945, 0.        ],
       [0.09777348, 0.08502024],
       [0.19070668, 0.23481781],
       [0.33301065, 0.11470985],
       [0.18780252, 0.65721997],
       [0.        , 0.2780027 ],
       [0.05324298, 0.08502024],
       [0.10212972, 0.03778677],
       [0.11326234, 0.65182186],
       [0.37560503, 0.80161943],
       [0.48354308, 0.87854251],
       [0.57018393, 0.87989204],
       [0.29041626, 0.36842105],
       [0.09680542, 0.53711201],
       [0.21442401, 0.08502024],
       [0.33301065, 1.        ],
       [0.27395934, 0.90688259],
       [1.        , 0.73549258],
       [0.39206196, 0.68016194],
       [0.10745402, 0.96761134],
       [0.26863504, 0.43184885],
       [0.16747338, 0.22537112],
       [0.20329138, 0.08636977],
       [0.04211036, 0.38596491]])

## Standardization (Z-Score Normalisation)

Here all the features will be transformed in such a way that it will have the properties of a standard normal distribution 
with mean = 0 and standard deviation = 1

In [8]:
scaling = StandardScaler()
scaling.fit_transform(df[["Alcohol","Malic"]])

array([[-0.72386526, -1.39449143],
       [-0.72156728, -1.13848474],
       [-0.28035416, -0.68742535],
       [ 0.39525341, -1.04908558],
       [-0.29414207,  0.58448086],
       [-1.18576024, -0.55739021],
       [-0.93298189, -1.13848474],
       [-0.70088541, -1.28071068],
       [-0.64803176,  0.56822647],
       [ 0.59747609,  1.01928586],
       [ 1.10992673,  1.25091096],
       [ 1.52126604,  1.25497456],
       [ 0.19303074, -0.28512914],
       [-0.72616325,  0.22282063],
       [-0.1677529 , -1.13848474],
       [ 0.39525341,  1.61663479],
       [ 0.11489925,  1.33624652],
       [ 3.56187668,  0.82016956],
       [ 0.67560758,  0.65356203],
       [-0.67560758,  1.51910844],
       [ 0.08962141, -0.09414002],
       [-0.39065744, -0.71587054],
       [-0.22060656, -1.13442115],
       [-0.98583555, -0.23230236]])

In [11]:
# pract
df_pract = pd.DataFrame({"A":[1,2,3,4,5,6,7,8]})
df_pract

Unnamed: 0,A
0,1
1,2
2,3
3,4
4,5
5,6
6,7
7,8


In [12]:
scaling = MinMaxScaler()

In [14]:
scaling.fit_transform(df_pract[["A"]])

array([[0.        ],
       [0.14285714],
       [0.28571429],
       [0.42857143],
       [0.57142857],
       [0.71428571],
       [0.85714286],
       [1.        ]])