In [1]:
import numpy as np
import pandas as pd


In [2]:
df=pd.DataFrame({'X':[1,2,3,4,5]})
df

Unnamed: 0,X
0,1
1,2
2,3
3,4
4,5


In [3]:
df.std(ddof=1)

X    1.581139
dtype: float64

In [4]:
df.std(ddof=0)

X    1.414214
dtype: float64

In [5]:
df['X_scp']=(df['X']-df['X'].mean())/df['X'].std(ddof=0) #Z-score normalization (also called standard scaling)
df

Unnamed: 0,X,X_scp
0,1,-1.414214
1,2,-0.707107
2,3,0.0
3,4,0.707107
4,5,1.414214


### StandardScaler 
operates on the principle of normalization, where it transforms the distribution of each feature to have a mean of zero and a standard deviation of one. This process ensures that all features are on the same scale, preventing any single feature from dominating the learning process due to its larger magnitude.

In [6]:
df=pd.DataFrame({'X':[1,2,3,4,5]})
df

Unnamed: 0,X
0,1
1,2
2,3
3,4
4,5


In [7]:
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
df['X_sc']=sc.fit_transform(df[['X']])
df

Unnamed: 0,X,X_sc
0,1,-1.414214
1,2,-0.707107
2,3,0.0
3,4,0.707107
4,5,1.414214


### Why Use Standardization?
✅ Makes data zero-centered with unit variance, improving model performance.
✅ Useful for distance-based algorithms (e.g., KNN, SVM, PCA, etc.).
✅ Prevents features with large scales from dominating those with smaller scales.

In [8]:
from sklearn.preprocessing import MinMaxScaler
mn_mx=MinMaxScaler()
df['X_mnmx']=mn_mx.fit_transform(df[['X']])
df

Unnamed: 0,X,X_sc,X_mnmx
0,1,-1.414214,0.0
1,2,-0.707107,0.25
2,3,0.0,0.5
3,4,0.707107,0.75
4,5,1.414214,1.0
