In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv("Transformation.csv")

In [3]:
df.head()

Unnamed: 0,Price
0,110
1,105
2,115
3,120
4,110


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9 entries, 0 to 8
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   Price   9 non-null      int64
dtypes: int64(1)
memory usage: 200.0 bytes


In [5]:
mean_df = df.Price.mean()

In [6]:
mean_df

116.11111111111111

In [7]:
median_df = df.Price.median()

In [8]:
median_df

110.0

In [9]:
mode_df = df.Price.mode()

In [10]:
mode_df

0    105
1    110
Name: Price, dtype: int64

In [11]:
std_df = df.Price.std()

In [12]:
std_df

15.567951410224504

In [13]:
q1 = df.Price.quantile(0.25)

In [14]:
q1

105.0

In [15]:
q2 = df.Price.quantile(0.5)

In [16]:
q2

110.0

In [17]:
q3 = df.Price.quantile(0.75)

In [18]:
q3

120.0

In [19]:
iqr = q3 - q1

In [20]:
iqr

15.0

### Standardization Auto

In [21]:
from sklearn.preprocessing import StandardScaler
std_scaler = StandardScaler()

In [22]:
df['Std_auto'] = std_scaler.fit_transform(df[["Price"]])

In [23]:
df

Unnamed: 0,Price,Std_auto
0,110,-0.416356
1,105,-0.757011
2,115,-0.075701
3,120,0.264954
4,110,-0.416356
5,130,0.946264
6,150,2.308884
7,100,-1.097666
8,105,-0.757011


In [24]:
df['Price'].describe()

count      9.000000
mean     116.111111
std       15.567951
min      100.000000
25%      105.000000
50%      110.000000
75%      120.000000
max      150.000000
Name: Price, dtype: float64

In [25]:
df['Std_auto'].describe()

count    9.000000e+00
mean    -2.343804e-16
std      1.060660e+00
min     -1.097666e+00
25%     -7.570112e-01
50%     -4.163561e-01
75%      2.649539e-01
max      2.308884e+00
Name: Std_auto, dtype: float64

### Standardization Manual

In [26]:
df['std_manual'] = (df['Price'] - mean_df)/df.Price.std(ddof=0) #default = ddof (0)

In [27]:
df

Unnamed: 0,Price,Std_auto,std_manual
0,110,-0.416356,-0.416356
1,105,-0.757011,-0.757011
2,115,-0.075701,-0.075701
3,120,0.264954,0.264954
4,110,-0.416356,-0.416356
5,130,0.946264,0.946264
6,150,2.308884,2.308884
7,100,-1.097666,-1.097666
8,105,-0.757011,-0.757011


### MinMaxScaling Auto

In [28]:
from sklearn.preprocessing import MinMaxScaler
mmscaler = MinMaxScaler()

In [29]:
df['minMax_auto'] = mmscaler.fit_transform(df[['Price']])

In [30]:
df

Unnamed: 0,Price,Std_auto,std_manual,minMax_auto
0,110,-0.416356,-0.416356,0.2
1,105,-0.757011,-0.757011,0.1
2,115,-0.075701,-0.075701,0.3
3,120,0.264954,0.264954,0.4
4,110,-0.416356,-0.416356,0.2
5,130,0.946264,0.946264,0.6
6,150,2.308884,2.308884,1.0
7,100,-1.097666,-1.097666,0.0
8,105,-0.757011,-0.757011,0.1


### MinMaxScaling Manual

In [31]:
df['minMax_manual'] = (df.Price - df.Price.min()) / (df.Price.max()-df.Price.min())

In [32]:
df

Unnamed: 0,Price,Std_auto,std_manual,minMax_auto,minMax_manual
0,110,-0.416356,-0.416356,0.2,0.2
1,105,-0.757011,-0.757011,0.1,0.1
2,115,-0.075701,-0.075701,0.3,0.3
3,120,0.264954,0.264954,0.4,0.4
4,110,-0.416356,-0.416356,0.2,0.2
5,130,0.946264,0.946264,0.6,0.6
6,150,2.308884,2.308884,1.0,1.0
7,100,-1.097666,-1.097666,0.0,0.0
8,105,-0.757011,-0.757011,0.1,0.1


### Max Absolute Scaler Auto

In [33]:
from sklearn.preprocessing import MaxAbsScaler
MaScaler = MaxAbsScaler()

In [34]:
df['MaScaler_Auto'] = MaScaler.fit_transform(df[['Price']])

In [35]:
df

Unnamed: 0,Price,Std_auto,std_manual,minMax_auto,minMax_manual,MaScaler_Auto
0,110,-0.416356,-0.416356,0.2,0.2,0.733333
1,105,-0.757011,-0.757011,0.1,0.1,0.7
2,115,-0.075701,-0.075701,0.3,0.3,0.766667
3,120,0.264954,0.264954,0.4,0.4,0.8
4,110,-0.416356,-0.416356,0.2,0.2,0.733333
5,130,0.946264,0.946264,0.6,0.6,0.866667
6,150,2.308884,2.308884,1.0,1.0,1.0
7,100,-1.097666,-1.097666,0.0,0.0,0.666667
8,105,-0.757011,-0.757011,0.1,0.1,0.7


### Max Absolute Scaler Manual

In [36]:
df['MaScaler_Manual'] = df.Price / np.abs(df.Price.max())

In [37]:
df

Unnamed: 0,Price,Std_auto,std_manual,minMax_auto,minMax_manual,MaScaler_Auto,MaScaler_Manual
0,110,-0.416356,-0.416356,0.2,0.2,0.733333,0.733333
1,105,-0.757011,-0.757011,0.1,0.1,0.7,0.7
2,115,-0.075701,-0.075701,0.3,0.3,0.766667,0.766667
3,120,0.264954,0.264954,0.4,0.4,0.8,0.8
4,110,-0.416356,-0.416356,0.2,0.2,0.733333,0.733333
5,130,0.946264,0.946264,0.6,0.6,0.866667,0.866667
6,150,2.308884,2.308884,1.0,1.0,1.0,1.0
7,100,-1.097666,-1.097666,0.0,0.0,0.666667,0.666667
8,105,-0.757011,-0.757011,0.1,0.1,0.7,0.7


### Robust Scaler Auto

In [38]:
from sklearn.preprocessing import RobustScaler
rScaler = RobustScaler()

In [39]:
df["rScaler_Auto"] = rScaler.fit_transform(df[['Price']])

In [40]:
df

Unnamed: 0,Price,Std_auto,std_manual,minMax_auto,minMax_manual,MaScaler_Auto,MaScaler_Manual,rScaler_Auto
0,110,-0.416356,-0.416356,0.2,0.2,0.733333,0.733333,0.0
1,105,-0.757011,-0.757011,0.1,0.1,0.7,0.7,-0.333333
2,115,-0.075701,-0.075701,0.3,0.3,0.766667,0.766667,0.333333
3,120,0.264954,0.264954,0.4,0.4,0.8,0.8,0.666667
4,110,-0.416356,-0.416356,0.2,0.2,0.733333,0.733333,0.0
5,130,0.946264,0.946264,0.6,0.6,0.866667,0.866667,1.333333
6,150,2.308884,2.308884,1.0,1.0,1.0,1.0,2.666667
7,100,-1.097666,-1.097666,0.0,0.0,0.666667,0.666667,-0.666667
8,105,-0.757011,-0.757011,0.1,0.1,0.7,0.7,-0.333333


### Robust Scaler Manual

In [41]:
df["rScaler_Manual"] = (df.Price - median_df)/iqr 

In [42]:
df

Unnamed: 0,Price,Std_auto,std_manual,minMax_auto,minMax_manual,MaScaler_Auto,MaScaler_Manual,rScaler_Auto,rScaler_Manual
0,110,-0.416356,-0.416356,0.2,0.2,0.733333,0.733333,0.0,0.0
1,105,-0.757011,-0.757011,0.1,0.1,0.7,0.7,-0.333333,-0.333333
2,115,-0.075701,-0.075701,0.3,0.3,0.766667,0.766667,0.333333,0.333333
3,120,0.264954,0.264954,0.4,0.4,0.8,0.8,0.666667,0.666667
4,110,-0.416356,-0.416356,0.2,0.2,0.733333,0.733333,0.0,0.0
5,130,0.946264,0.946264,0.6,0.6,0.866667,0.866667,1.333333,1.333333
6,150,2.308884,2.308884,1.0,1.0,1.0,1.0,2.666667,2.666667
7,100,-1.097666,-1.097666,0.0,0.0,0.666667,0.666667,-0.666667,-0.666667
8,105,-0.757011,-0.757011,0.1,0.1,0.7,0.7,-0.333333,-0.333333


### Log Transform Auto

In [43]:
from sklearn.preprocessing import FunctionTransformer
LT = FunctionTransformer(func=np.log1p)

In [44]:
df['LT_1p_Auto'] = LT.fit_transform(df[['Price']])

In [45]:
df

Unnamed: 0,Price,Std_auto,std_manual,minMax_auto,minMax_manual,MaScaler_Auto,MaScaler_Manual,rScaler_Auto,rScaler_Manual,LT_1p_Auto
0,110,-0.416356,-0.416356,0.2,0.2,0.733333,0.733333,0.0,0.0,4.70953
1,105,-0.757011,-0.757011,0.1,0.1,0.7,0.7,-0.333333,-0.333333,4.663439
2,115,-0.075701,-0.075701,0.3,0.3,0.766667,0.766667,0.333333,0.333333,4.75359
3,120,0.264954,0.264954,0.4,0.4,0.8,0.8,0.666667,0.666667,4.795791
4,110,-0.416356,-0.416356,0.2,0.2,0.733333,0.733333,0.0,0.0,4.70953
5,130,0.946264,0.946264,0.6,0.6,0.866667,0.866667,1.333333,1.333333,4.875197
6,150,2.308884,2.308884,1.0,1.0,1.0,1.0,2.666667,2.666667,5.01728
7,100,-1.097666,-1.097666,0.0,0.0,0.666667,0.666667,-0.666667,-0.666667,4.615121
8,105,-0.757011,-0.757011,0.1,0.1,0.7,0.7,-0.333333,-0.333333,4.663439


### Log Transformer Manual

In [46]:
df['LT_1p_Manual'] = np.log1p(df.Price)

In [47]:
df

Unnamed: 0,Price,Std_auto,std_manual,minMax_auto,minMax_manual,MaScaler_Auto,MaScaler_Manual,rScaler_Auto,rScaler_Manual,LT_1p_Auto,LT_1p_Manual
0,110,-0.416356,-0.416356,0.2,0.2,0.733333,0.733333,0.0,0.0,4.70953,4.70953
1,105,-0.757011,-0.757011,0.1,0.1,0.7,0.7,-0.333333,-0.333333,4.663439,4.663439
2,115,-0.075701,-0.075701,0.3,0.3,0.766667,0.766667,0.333333,0.333333,4.75359,4.75359
3,120,0.264954,0.264954,0.4,0.4,0.8,0.8,0.666667,0.666667,4.795791,4.795791
4,110,-0.416356,-0.416356,0.2,0.2,0.733333,0.733333,0.0,0.0,4.70953,4.70953
5,130,0.946264,0.946264,0.6,0.6,0.866667,0.866667,1.333333,1.333333,4.875197,4.875197
6,150,2.308884,2.308884,1.0,1.0,1.0,1.0,2.666667,2.666667,5.01728,5.01728
7,100,-1.097666,-1.097666,0.0,0.0,0.666667,0.666667,-0.666667,-0.666667,4.615121,4.615121
8,105,-0.757011,-0.757011,0.1,0.1,0.7,0.7,-0.333333,-0.333333,4.663439,4.663439
