In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('Feature_engineering.csv')

In [3]:
df

Unnamed: 0,price
0,110
1,105
2,115
3,120
4,110
5,130
6,150
7,100
8,105


1. Normalization
2. Standardization
3. Log Transformation
4. Robust Scaler
5. Max Absolute Scaler


# Normalization 

In [4]:
min_price = df[['price']].min()
max_price = df[['price']].max()

In [5]:
min_price

price    100
dtype: int64

In [6]:
max_price

price    150
dtype: int64

In [7]:
df['Price_scaled_manual'] = (df[['price']] - min_price) / (max_price - min_price)

In [8]:
df

Unnamed: 0,price,Price_scaled_manual
0,110,0.2
1,105,0.1
2,115,0.3
3,120,0.4
4,110,0.2
5,130,0.6
6,150,1.0
7,100,0.0
8,105,0.1


In [9]:
df.to_csv("price_scaled_manual(Normalization).csv")

# Normalization Sklearn

In [10]:
from sklearn.preprocessing import MinMaxScaler

In [11]:
ps = MinMaxScaler()

In [12]:
df['price-scaled_sklearn'] = ps.fit_transform(df[['price']]) # fit_transform always carries 2d array

In [13]:
df

Unnamed: 0,price,Price_scaled_manual,price-scaled_sklearn
0,110,0.2,0.2
1,105,0.1,0.1
2,115,0.3,0.3
3,120,0.4,0.4
4,110,0.2,0.2
5,130,0.6,0.6
6,150,1.0,1.0
7,100,0.0,0.0
8,105,0.1,0.1


In [14]:
df.to_csv("price_scaled_sklearn(Normalization).csv")

# Standardization Manual

In [15]:
df = pd.read_csv('Feature_engineering.csv')

In [16]:
df.drop_duplicates()

Unnamed: 0,price
0,110
1,105
2,115
3,120
5,130
6,150
7,100


In [17]:
mn = df[['price']].mean()

In [18]:
mn

price    116.111111
dtype: float64

In [19]:
std = df[['price']].std(ddof=0)
std

price    14.677605
dtype: float64

In [20]:
df['price_scale_manual'] = (df[['price']] - mn) / df[['price']].std(ddof=0) # ddof=0 used here to get accurate value

In [21]:
df

Unnamed: 0,price,price_scale_manual
0,110,-0.416356
1,105,-0.757011
2,115,-0.075701
3,120,0.264954
4,110,-0.416356
5,130,0.946264
6,150,2.308884
7,100,-1.097666
8,105,-0.757011


In [22]:
df.to_csv('price_scaled_manual(standard).csv')

# Standardization Sklearn

In [23]:
from sklearn.preprocessing import StandardScaler

In [24]:
price_stand = StandardScaler()

In [25]:
df['price_scaled_sklearn'] = price_stand.fit_transform(df[['price']])

In [26]:
df

Unnamed: 0,price,price_scale_manual,price_scaled_sklearn
0,110,-0.416356,-0.416356
1,105,-0.757011,-0.757011
2,115,-0.075701,-0.075701
3,120,0.264954,0.264954
4,110,-0.416356,-0.416356
5,130,0.946264,0.946264
6,150,2.308884,2.308884
7,100,-1.097666,-1.097666
8,105,-0.757011,-0.757011


In [27]:
df.to_csv('price_scaled_sklearn(standard).csv')

# Log Transformation Manual 

In [28]:
df = pd.read_csv('Feature_engineering.csv')

In [29]:
df

Unnamed: 0,price
0,110
1,105
2,115
3,120
4,110
5,130
6,150
7,100
8,105


In [30]:
df['price_scaled_manual'] = np.log1p(df[['price']])

In [31]:
df

Unnamed: 0,price,price_scaled_manual
0,110,4.70953
1,105,4.663439
2,115,4.75359
3,120,4.795791
4,110,4.70953
5,130,4.875197
6,150,5.01728
7,100,4.615121
8,105,4.663439


In [32]:
df.to_csv("price_scaled_manual(Log_trans).csv")

# Log Transformation sklearn

In [33]:
from sklearn.preprocessing import FunctionTransformer

In [34]:
lt = FunctionTransformer(np.log1p)

In [35]:
df['price_scaled_sklearn'] = lt.fit_transform(df[['price']])

In [36]:
df

Unnamed: 0,price,price_scaled_manual,price_scaled_sklearn
0,110,4.70953,4.70953
1,105,4.663439,4.663439
2,115,4.75359,4.75359
3,120,4.795791,4.795791
4,110,4.70953,4.70953
5,130,4.875197,4.875197
6,150,5.01728,5.01728
7,100,4.615121,4.615121
8,105,4.663439,4.663439


In [37]:
df.to_csv('price_scaled_sklearn(Log_trans).csv')

# Robust Manual

In [38]:
df = pd.read_csv('Feature_engineering.csv')

In [39]:
df

Unnamed: 0,price
0,110
1,105
2,115
3,120
4,110
5,130
6,150
7,100
8,105


In [40]:
q1 = df.price.quantile(.25)

In [41]:
q1

105.0

In [42]:
q2 = df.price.quantile(.50)
q3 = df.price.quantile(.75)

In [43]:
q2

110.0

In [44]:
q3

120.0

In [45]:
df['price_scaled_manual'] = (df[['price']] - q2) / (q3 - q1)

In [46]:
df

Unnamed: 0,price,price_scaled_manual
0,110,0.0
1,105,-0.333333
2,115,0.333333
3,120,0.666667
4,110,0.0
5,130,1.333333
6,150,2.666667
7,100,-0.666667
8,105,-0.333333


In [47]:
df.to_csv("price_scaled_manual(Robust).csv")

# Robust scaler sklearn

In [48]:
from sklearn.preprocessing import RobustScaler

In [49]:
rs = RobustScaler()

In [50]:
df['price_scaled_sklearn'] = rs.fit_transform(df[['price']])

In [51]:
df

Unnamed: 0,price,price_scaled_manual,price_scaled_sklearn
0,110,0.0,0.0
1,105,-0.333333,-0.333333
2,115,0.333333,0.333333
3,120,0.666667,0.666667
4,110,0.0,0.0
5,130,1.333333,1.333333
6,150,2.666667,2.666667
7,100,-0.666667,-0.666667
8,105,-0.333333,-0.333333


In [52]:
df.to_csv('price_scaled_sklearn(Robust).csv')

# Max ABS Scaler manual

In [53]:
df = pd.read_csv('Feature_engineering.csv')

In [54]:
df

Unnamed: 0,price
0,110
1,105
2,115
3,120
4,110
5,130
6,150
7,100
8,105


In [55]:
from sklearn.preprocessing import MaxAbsScaler

In [56]:
max_price = df.price.max()

In [57]:
max_price

150

In [58]:
max_abs = MaxAbsScaler()

In [59]:
df['price_scaled_sklearn'] = max_abs.fit_transform(df[['price']])

In [60]:
df

Unnamed: 0,price,price_scaled_sklearn
0,110,0.733333
1,105,0.7
2,115,0.766667
3,120,0.8
4,110,0.733333
5,130,0.866667
6,150,1.0
7,100,0.666667
8,105,0.7


In [61]:
df.to_csv('price_scaled_sklearn(MaxAbs).csv')

# Max ABS scaler manual

In [62]:
max_abs = df.price.abs().max()

In [63]:
max_abs

150

In [64]:
df['price_scaled_manual'] = (df[['price']]) / max_abs

In [65]:
df

Unnamed: 0,price,price_scaled_sklearn,price_scaled_manual
0,110,0.733333,0.733333
1,105,0.7,0.7
2,115,0.766667,0.766667
3,120,0.8,0.8
4,110,0.733333,0.733333
5,130,0.866667,0.866667
6,150,1.0,1.0
7,100,0.666667,0.666667
8,105,0.7,0.7


In [66]:
df.to_csv("price_scaled_manual(MaxAbs).csv")