<a href="https://colab.research.google.com/github/KrushnaTaur/ML-Practice/blob/main/02_Data_Preprocessing/04_Data_Scaling_and_Normalization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Import Libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler

In [2]:
# Create Sample Data

data = {
    'Name': ['Amit', 'Riya', 'Karan', 'Neha', 'Vikas'],
    'Age': [22, 25, 47, 35, 52],
    'Salary': [25000, 40000, 80000, 52000, 91000]
}

df = pd.DataFrame(data)
df

Unnamed: 0,Name,Age,Salary
0,Amit,22,25000
1,Riya,25,40000
2,Karan,47,80000
3,Neha,35,52000
4,Vikas,52,91000


In [3]:
df.describe()

Unnamed: 0,Age,Salary
count,5.0,5.0
mean,36.2,57600.0
std,13.17953,27482.721845
min,22.0,25000.0
25%,25.0,40000.0
50%,35.0,52000.0
75%,47.0,80000.0
max,52.0,91000.0


In [4]:
# Standardization (Using StandardScaler)
# Brings data to zero mean and unit variance.

scaler = StandardScaler()
scaled_values = scaler.fit_transform(df[['Age', 'Salary']])

df_scaled = df.copy()
df_scaled[['Age', 'Salary']] = scaled_values
df_scaled

Unnamed: 0,Name,Age,Salary
0,Amit,-1.204602,-1.326212
1,Riya,-0.950108,-0.715992
2,Karan,0.916176,0.911262
3,Neha,-0.101797,-0.227816
4,Vikas,1.340331,1.358757


In [5]:
df_scaled.describe().round(2)

Unnamed: 0,Age,Salary
count,5.0,5.0
mean,-0.0,-0.0
std,1.12,1.12
min,-1.2,-1.33
25%,-0.95,-0.72
50%,-0.1,-0.23
75%,0.92,0.91
max,1.34,1.36


In [6]:
# Normalization (Using MinMaxScaler)
#Scales features to a 0–1 range.

mm_scaler = MinMaxScaler()
df_minmax = df.copy()
df_minmax[['Age', 'Salary']] = mm_scaler.fit_transform(df[['Age', 'Salary']])
df_minmax

Unnamed: 0,Name,Age,Salary
0,Amit,0.0,0.0
1,Riya,0.1,0.227273
2,Karan,0.833333,0.833333
3,Neha,0.433333,0.409091
4,Vikas,1.0,1.0


In [7]:
# Robust Scaling (Using RobustScaler)
# Useful when data has outliers (less affected by extreme values).

rb_scaler = RobustScaler()
df_robust = df.copy()
df_robust[['Age', 'Salary']] = rb_scaler.fit_transform(df[['Age', 'Salary']])
df_robust

Unnamed: 0,Name,Age,Salary
0,Amit,-0.590909,-0.675
1,Riya,-0.454545,-0.3
2,Karan,0.545455,0.7
3,Neha,0.0,0.0
4,Vikas,0.772727,0.975


In [8]:
# Mini Task
import pandas as pd
from sklearn.preprocessing import StandardScaler

In [9]:
data = {
    'Name': ['Amit', 'Riya', 'Karan', 'Neha', 'Vikas'],
    'Age': [22, 25, 47, 35, 52],
    'Salary': [25000, 40000, 80000, 52000, 91000]
}
df = pd.DataFrame(data)

print("Before Scaling:\n", df)

Before Scaling:
     Name  Age  Salary
0   Amit   22   25000
1   Riya   25   40000
2  Karan   47   80000
3   Neha   35   52000
4  Vikas   52   91000


In [10]:
print("Before Scaling:\n", df)

Before Scaling:
     Name  Age  Salary
0   Amit   22   25000
1   Riya   25   40000
2  Karan   47   80000
3   Neha   35   52000
4  Vikas   52   91000


In [12]:
# Standard Scaling
scaler = StandardScaler()
df[['Age', 'Salary']] = scaler.fit_transform(df[['Age', 'Salary']])

print("After Scaling (StandardScaler):\n", df)

After Scaling (StandardScaler):
     Name       Age    Salary
0   Amit -1.204602 -1.326212
1   Riya -0.950108 -0.715992
2  Karan  0.916176  0.911262
3   Neha -0.101797 -0.227816
4  Vikas  1.340331  1.358757
