In [2]:
Price = [110, 105, 115, 120, 110, 130, 150, 100, 105]

#### Import pandas, create dataframe

In [3]:
import pandas as pd

In [20]:
df = pd.DataFrame({'price': Price})
df

Unnamed: 0,price
0,110
1,105
2,115
3,120
4,110
5,130
6,150
7,100
8,105


### Create excel file

In [21]:
df.to_excel('feature_price.xlsx', index=False)
print('excel file created successfully.')

excel file created successfully.


In [22]:
df = pd.read_excel('feature_price.xlsx')
df

Unnamed: 0,price
0,110
1,105
2,115
3,120
4,110
5,130
6,150
7,100
8,105


## Techniques to perform for Feature Transformation:
▪ Normalization  
▪ Standardization  
▪ Log Transformation  
▪ Robust Scaler  
▪ Max Absolute Scaler  

## Normalization

In [23]:
min_price = df.price.min()
max_price = df.price.max()

In [24]:
min_price

100

In [25]:
max_price

150

In [26]:
df['price_norm_manual'] = (df.price - min_price ) / (max_price - min_price)
df

Unnamed: 0,price,price_norm_manual
0,110,0.2
1,105,0.1
2,115,0.3
3,120,0.4
4,110,0.2
5,130,0.6
6,150,1.0
7,100,0.0
8,105,0.1


In [27]:
from sklearn.preprocessing import MinMaxScaler
mmx = MinMaxScaler()

In [28]:
df['price_norm_sklearn'] = mmx.fit_transform(df[['price']])
df.head()

Unnamed: 0,price,price_norm_manual,price_norm_sklearn
0,110,0.2,0.2
1,105,0.1,0.1
2,115,0.3,0.3
3,120,0.4,0.4
4,110,0.2,0.2


## Standardization

In [29]:
mean_price = df.price.mean()
std = df.price.std()

In [30]:
df['price_std_manual'] = (df['price'] - mean_price) / std
df

Unnamed: 0,price,price_norm_manual,price_norm_sklearn,price_std_manual
0,110,0.2,0.2,-0.392544
1,105,0.1,0.1,-0.713717
2,115,0.3,0.3,-0.071372
3,120,0.4,0.4,0.249801
4,110,0.2,0.2,-0.392544
5,130,0.6,0.6,0.892146
6,150,1.0,1.0,2.176837
7,100,0.0,0.0,-1.03489
8,105,0.1,0.1,-0.713717


In [31]:
from sklearn.preprocessing import StandardScaler 
scaler = StandardScaler()

In [32]:
df['price_std_sklearn'] = scaler.fit_transform(df[['price']])
df.head()

Unnamed: 0,price,price_norm_manual,price_norm_sklearn,price_std_manual,price_std_sklearn
0,110,0.2,0.2,-0.392544,-0.416356
1,105,0.1,0.1,-0.713717,-0.757011
2,115,0.3,0.3,-0.071372,-0.075701
3,120,0.4,0.4,0.249801,0.264954
4,110,0.2,0.2,-0.392544,-0.416356


## Log Transformation

In [33]:
import numpy as np

In [44]:
df['price_log_manual'] = np.log10(df.price)
df.head()

Unnamed: 0,price,price_norm_manual,price_norm_sklearn,price_std_manual,price_std_sklearn,price_log_manual
0,110,0.2,0.2,-0.392544,-0.416356,2.041393
1,105,0.1,0.1,-0.713717,-0.757011,2.021189
2,115,0.3,0.3,-0.071372,-0.075701,2.060698
3,120,0.4,0.4,0.249801,0.264954,2.079181
4,110,0.2,0.2,-0.392544,-0.416356,2.041393


In [63]:
from sklearn.preprocessing import FunctionTransformer
log_10 = FunctionTransformer(np.log10, validate=True)

In [69]:
df['price_log_sklearn'] = log_10.transform(df[['price']])
df.head()



Unnamed: 0,price,price_norm_manual,price_norm_sklearn,price_std_manual,price_std_sklearn,price_log_manual,price_log_sklearn
0,110,0.2,0.2,-0.392544,-0.416356,2.041393,2.041393
1,105,0.1,0.1,-0.713717,-0.757011,2.021189,2.021189
2,115,0.3,0.3,-0.071372,-0.075701,2.060698,2.060698
3,120,0.4,0.4,0.249801,0.264954,2.079181,2.079181
4,110,0.2,0.2,-0.392544,-0.416356,2.041393,2.041393


## Robust Scaler

In [72]:
IQR = df.price.quantile(.75) - df.price.quantile(.25)
med = df.price.median()

In [74]:
df['price_robust_manual'] = (df.price - med) / IQR
df

Unnamed: 0,price,price_norm_manual,price_norm_sklearn,price_std_manual,price_std_sklearn,price_log_manual,price_log_sklearn,price_robust_manual
0,110,0.2,0.2,-0.392544,-0.416356,2.041393,2.041393,0.0
1,105,0.1,0.1,-0.713717,-0.757011,2.021189,2.021189,-0.333333
2,115,0.3,0.3,-0.071372,-0.075701,2.060698,2.060698,0.333333
3,120,0.4,0.4,0.249801,0.264954,2.079181,2.079181,0.666667
4,110,0.2,0.2,-0.392544,-0.416356,2.041393,2.041393,0.0
5,130,0.6,0.6,0.892146,0.946264,2.113943,2.113943,1.333333
6,150,1.0,1.0,2.176837,2.308884,2.176091,2.176091,2.666667
7,100,0.0,0.0,-1.03489,-1.097666,2.0,2.0,-0.666667
8,105,0.1,0.1,-0.713717,-0.757011,2.021189,2.021189,-0.333333


In [75]:
from sklearn.preprocessing import RobustScaler
robust_scaler = RobustScaler()

In [77]:
df['price_robust_sklearn'] = robust_scaler.fit_transform(df[['price']])
df.head()

Unnamed: 0,price,price_norm_manual,price_norm_sklearn,price_std_manual,price_std_sklearn,price_log_manual,price_log_sklearn,price_robust_manual,price_robust_sklearn
0,110,0.2,0.2,-0.392544,-0.416356,2.041393,2.041393,0.0,0.0
1,105,0.1,0.1,-0.713717,-0.757011,2.021189,2.021189,-0.333333,-0.333333
2,115,0.3,0.3,-0.071372,-0.075701,2.060698,2.060698,0.333333,0.333333
3,120,0.4,0.4,0.249801,0.264954,2.079181,2.079181,0.666667,0.666667
4,110,0.2,0.2,-0.392544,-0.416356,2.041393,2.041393,0.0,0.0


## Max Absolute Scaler

In [79]:
df['price_max_manual'] = df.price / df.price.max()
df

Unnamed: 0,price,price_norm_manual,price_norm_sklearn,price_std_manual,price_std_sklearn,price_log_manual,price_log_sklearn,price_robust_manual,price_robust_sklearn,price_max_manual
0,110,0.2,0.2,-0.392544,-0.416356,2.041393,2.041393,0.0,0.0,0.733333
1,105,0.1,0.1,-0.713717,-0.757011,2.021189,2.021189,-0.333333,-0.333333,0.7
2,115,0.3,0.3,-0.071372,-0.075701,2.060698,2.060698,0.333333,0.333333,0.766667
3,120,0.4,0.4,0.249801,0.264954,2.079181,2.079181,0.666667,0.666667,0.8
4,110,0.2,0.2,-0.392544,-0.416356,2.041393,2.041393,0.0,0.0,0.733333
5,130,0.6,0.6,0.892146,0.946264,2.113943,2.113943,1.333333,1.333333,0.866667
6,150,1.0,1.0,2.176837,2.308884,2.176091,2.176091,2.666667,2.666667,1.0
7,100,0.0,0.0,-1.03489,-1.097666,2.0,2.0,-0.666667,-0.666667,0.666667
8,105,0.1,0.1,-0.713717,-0.757011,2.021189,2.021189,-0.333333,-0.333333,0.7


In [80]:
from sklearn.preprocessing import MaxAbsScaler
mx = MaxAbsScaler()

In [82]:
df['price_max_sklearn'] = mx.fit_transform(df[['price']])
df.head()

Unnamed: 0,price,price_norm_manual,price_norm_sklearn,price_std_manual,price_std_sklearn,price_log_manual,price_log_sklearn,price_robust_manual,price_robust_sklearn,price_max_manual,price_max_sklearn
0,110,0.2,0.2,-0.392544,-0.416356,2.041393,2.041393,0.0,0.0,0.733333,0.733333
1,105,0.1,0.1,-0.713717,-0.757011,2.021189,2.021189,-0.333333,-0.333333,0.7,0.7
2,115,0.3,0.3,-0.071372,-0.075701,2.060698,2.060698,0.333333,0.333333,0.766667,0.766667
3,120,0.4,0.4,0.249801,0.264954,2.079181,2.079181,0.666667,0.666667,0.8,0.8
4,110,0.2,0.2,-0.392544,-0.416356,2.041393,2.041393,0.0,0.0,0.733333,0.733333
