# Feature Scaling methods

In [2]:
# library install
!pip install scikit-learn

Collecting scikit-learn
  Downloading scikit_learn-1.5.2-cp312-cp312-win_amd64.whl.metadata (13 kB)
Collecting joblib>=1.2.0 (from scikit-learn)
  Using cached joblib-1.4.2-py3-none-any.whl.metadata (5.4 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn)
  Downloading threadpoolctl-3.5.0-py3-none-any.whl.metadata (13 kB)
Downloading scikit_learn-1.5.2-cp312-cp312-win_amd64.whl (11.0 MB)
   ---------------------------------------- 0.0/11.0 MB ? eta -:--:--
   - -------------------------------------- 0.5/11.0 MB 3.4 MB/s eta 0:00:04
   -- ------------------------------------- 0.8/11.0 MB 2.1 MB/s eta 0:00:05
   -- ------------------------------------- 0.8/11.0 MB 2.1 MB/s eta 0:00:05
   --- ------------------------------------ 1.0/11.0 MB 1.1 MB/s eta 0:00:09
   --- ------------------------------------ 1.0/11.0 MB 1.1 MB/s eta 0:00:09
   --- ------------------------------------ 1.0/11.0 MB 1.1 MB/s eta 0:00:09
   ---- ----------------------------------- 1.3/11.0 MB 849.0 kB/s eta 0:

## Min-Max Scaling

In [6]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# sample data
data = {'Value': [ 10, 20, 30, 40, 50]}
df = pd.DataFrame(data)
print(df)



   Value
0     10
1     20
2     30
3     40
4     50
   Value  Scaled Value
0     10          0.00
1     20          0.25
2     30          0.50
3     40          0.75
4     50          1.00


In [7]:
# MinMax Scaler
scaler = MinMaxScaler()
df["Scaled Value"] = scaler.fit_transform(df[["Value"]])
print(df)

   Value  Scaled Value
0     10          0.00
1     20          0.25
2     30          0.50
3     40          0.75
4     50          1.00


## Standard Scaling (Z-Score Scaling)

In [9]:
import pandas as pd
from sklearn.preprocessing import StandardScaler

# sample data
data = {'Value': [ 10, 20, 30, 40, 50]}
df = pd.DataFrame(data)
print(df)

   Value
0     10
1     20
2     30
3     40
4     50


In [11]:
# Standard Scaler
scaler = StandardScaler()
df["Scaled Value"] = scaler.fit_transform(df[["Value"]])
print(df)

   Value  Scaled Value
0     10     -1.414214
1     20     -0.707107
2     30      0.000000
3     40      0.707107
4     50      1.414214


# Robust Scaling

In [12]:
from sklearn.preprocessing import RobustScaler

# sample data
data = {'Value': [ 10, 20, 30, 1000, 50]}
df = pd.DataFrame(data)
print(df)

   Value
0     10
1     20
2     30
3   1000
4     50


In [13]:
# Robust Scaler
scaler = RobustScaler()
df["Scaled Value"] = scaler.fit_transform(df[["Value"]])
print(df)

   Value  Scaled Value
0     10     -0.666667
1     20     -0.333333
2     30      0.000000
3   1000     32.333333
4     50      0.666667


# Logrothmic Scaling

In [14]:
import numpy as np
import pandas as pd

# random data with outlier
data = {'Value': [ 1000, 20000, 30000, 100000, 50000]}
df= pd.DataFrame(data)
print(df)

# log transformation
df["scaled log"] = np.log(df[["Value"]])   
df["scaled log2"] = np.log2(df[["Value"]])   
df["scaled log10"] = np.log10(df[["Value"]])   

print(df)

    Value
0    1000
1   20000
2   30000
3  100000
4   50000
    Value  scaled log  scaled log2  scaled log10
0    1000    6.907755     9.965784      3.000000
1   20000    9.903488    14.287712      4.301030
2   30000   10.308953    14.872675      4.477121
3  100000   11.512925    16.609640      5.000000
4   50000   10.819778    15.609640      4.698970
