# <font color="maganta"><h3 align="center">Feature Scaling</h3></font> 

<h3 style = "color:green" >Different types of feature scaling techniques:</h3>

1. Min-Max Scaling (Normalization) <br>
2. Standardization (Z-score normalization) <br>
3. Log Transformation <br>
4. Robust Scaling <br>
5. Max Absolute Scaling

<h3 style = "color:green" >Imported Libraries</h3>

In [1]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import MinMaxScaler,StandardScaler,FunctionTransformer,RobustScaler,MaxAbsScaler

<h3 style = "color:green" >Data Set</h3> 

In [2]:
data = {'price': [110,105,115,120,110,130,150,100,105]}

df = pd.DataFrame(data)
df

Unnamed: 0,price
0,110
1,105
2,115
3,120
4,110
5,130
6,150
7,100
8,105


In [3]:
#for manual calculation 
x = df.price #x=df['price']

## <font color="purple"><h3>1. Min-Max Scaling (Normalization)</h3></font>

<img src="Normalization.JPG" width="400" height="200">

### Using Python

In [4]:
normalization = (x-np.min(x))/(np.max(x)-np.min(x))
#normalization = (x-min(x))/(max(x)-min(x))
normalization

0    0.2
1    0.1
2    0.3
3    0.4
4    0.2
5    0.6
6    1.0
7    0.0
8    0.1
Name: price, dtype: float64

### Using sklearn module

In [5]:
from sklearn.preprocessing import MinMaxScaler
mms = MinMaxScaler()

In [6]:
mms.fit_transform(df[['price']])

array([[0.2],
       [0.1],
       [0.3],
       [0.4],
       [0.2],
       [0.6],
       [1. ],
       [0. ],
       [0.1]])

## <font color="purple"><h3>2. Standardization (Z-score Normalization)</h3></font>

<img src="standarization.JPG" width="400" height="200">

### Using Python

In [7]:
standardization = (x-x.mean())/x.std(ddof=0) #ddof=o means we use n to calculate std.
standardization

0   -0.416356
1   -0.757011
2   -0.075701
3    0.264954
4   -0.416356
5    0.946264
6    2.308884
7   -1.097666
8   -0.757011
Name: price, dtype: float64

### Using sklearn module

In [8]:
from sklearn.preprocessing import StandardScaler 
ss = StandardScaler()

In [9]:
ss.fit_transform(df[['price']])

array([[-0.41635614],
       [-0.75701116],
       [-0.07570112],
       [ 0.26495391],
       [-0.41635614],
       [ 0.94626396],
       [ 2.30888405],
       [-1.09766619],
       [-0.75701116]])

## <font color="purple"><h3>3.Log Transformation</h3></font>

### Using Python

In [10]:
log_transformation = np.log1p(x) #log1p(x)=ln(1+x)
log_transformation

0    4.709530
1    4.663439
2    4.753590
3    4.795791
4    4.709530
5    4.875197
6    5.017280
7    4.615121
8    4.663439
Name: price, dtype: float64

### Using sklearn module

In [11]:
from sklearn.preprocessing import FunctionTransformer
fc = FunctionTransformer(np.log1p)

In [12]:
#fc = FunctionTransformer(np.log1p)
fc.fit_transform(df[['price']])

Unnamed: 0,price
0,4.70953
1,4.663439
2,4.75359
3,4.795791
4,4.70953
5,4.875197
6,5.01728
7,4.615121
8,4.663439


## <font color="purple"><h3>4.Robust Scaling</h3></font>

<img src="Robust Scaler.JPG" width="700" height="300">

### Using Python

In [13]:
robust_Scaling = (x-np.median(x))/(np.percentile(x,75) - np.percentile(x,25))
robust_Scaling

0    0.000000
1   -0.333333
2    0.333333
3    0.666667
4    0.000000
5    1.333333
6    2.666667
7   -0.666667
8   -0.333333
Name: price, dtype: float64

### Using sklearn module

In [14]:
from sklearn.preprocessing import RobustScaler 
RoSc=RobustScaler()

In [15]:
RoSc.fit_transform(df[['price']])

array([[ 0.        ],
       [-0.33333333],
       [ 0.33333333],
       [ 0.66666667],
       [ 0.        ],
       [ 1.33333333],
       [ 2.66666667],
       [-0.66666667],
       [-0.33333333]])

## <font color="purple"><h3> 5.Max Absolute Scaling </h3></font>

<img src="Max Absolute Scaler.JPG" width="600" height="250">

### Using Python

In [16]:
max_abs_scaling = x/max(x)
max_abs_scaling

0    0.733333
1    0.700000
2    0.766667
3    0.800000
4    0.733333
5    0.866667
6    1.000000
7    0.666667
8    0.700000
Name: price, dtype: float64

### Using sklearn module

In [17]:
from sklearn.preprocessing import MaxAbsScaler
mx = MaxAbsScaler()

In [18]:
mx.fit_transform(df[['price']])

array([[0.73333333],
       [0.7       ],
       [0.76666667],
       [0.8       ],
       [0.73333333],
       [0.86666667],
       [1.        ],
       [0.66666667],
       [0.7       ]])

## <font color="green">All Feature Transformations using Python Manually</font> 

In [19]:
df_python = df.copy()

df_python['Normalization(py)'] = (x-min(x))/(max(x)-min(x))
df_python['Standarization(py)'] = (x-x.mean())/x.std(ddof=0)
df_python['Log_trans(py)'] = np.log1p(x)
df_python['Robust_Scaling(py)'] = (x-np.median(x))/(np.percentile(x,75) - np.percentile(x,25))
df_python['Max_Abs_Scaling(py)'] = x/max(x)

df_python

Unnamed: 0,price,Normalization(py),Standarization(py),Log_trans(py),Robust_Scaling(py),Max_Abs_Scaling(py)
0,110,0.2,-0.416356,4.70953,0.0,0.733333
1,105,0.1,-0.757011,4.663439,-0.333333,0.7
2,115,0.3,-0.075701,4.75359,0.333333,0.766667
3,120,0.4,0.264954,4.795791,0.666667,0.8
4,110,0.2,-0.416356,4.70953,0.0,0.733333
5,130,0.6,0.946264,4.875197,1.333333,0.866667
6,150,1.0,2.308884,5.01728,2.666667,1.0
7,100,0.0,-1.097666,4.615121,-0.666667,0.666667
8,105,0.1,-0.757011,4.663439,-0.333333,0.7


## <font color="green">All Feature Transformations using Scikit-Learn Module</font>

In [20]:
df_sklearn = df.copy()

df_sklearn['Normalization(sk)'] = mms.fit_transform(df[['price']])
df_sklearn['Standarization(sk)'] = ss.fit_transform(df[['price']])
df_sklearn['Log_trans(sk)'] = fc.fit_transform(df[['price']])
df_sklearn['Ronust_Scaling(sk)'] = RoSc.fit_transform(df[['price']])
df_sklearn['Max_Abs(sk)'] = mx.fit_transform(df[['price']])

df_sklearn

Unnamed: 0,price,Normalization(sk),Standarization(sk),Log_trans(sk),Ronust_Scaling(sk),Max_Abs(sk)
0,110,0.2,-0.416356,4.70953,0.0,0.733333
1,105,0.1,-0.757011,4.663439,-0.333333,0.7
2,115,0.3,-0.075701,4.75359,0.333333,0.766667
3,120,0.4,0.264954,4.795791,0.666667,0.8
4,110,0.2,-0.416356,4.70953,0.0,0.733333
5,130,0.6,0.946264,4.875197,1.333333,0.866667
6,150,1.0,2.308884,5.01728,2.666667,1.0
7,100,0.0,-1.097666,4.615121,-0.666667,0.666667
8,105,0.1,-0.757011,4.663439,-0.333333,0.7
