# Normalization

<img src="https://frenzy86.s3.eu-west-2.amazonaws.com/python/normalization.png " width="1200">


In [12]:
import pandas as pd
df = pd.DataFrame({
               'A':[1,2,3],
               'B':[100,300,500],
               'C':list('abc')
             })
df

Unnamed: 0,A,B,C
0,1,100,a
1,2,300,b
2,3,500,c


In [13]:
def normalize(df):
    result = df.copy()
    for feature_name in df.columns:
        max_value = df[feature_name].max()
        min_value = df[feature_name].min()
        result[feature_name] = (df[feature_name] - min_value) / (max_value - min_value)
    return result

In [14]:
normalize(df[['A','B']])

Unnamed: 0,A,B
0,0.0,0.0
1,0.5,0.5
2,1.0,1.0


In [15]:
import pandas as pd
df = pd.DataFrame({
               'A':[1,2,3],
               'B':[100,300,500],
             })

# Using sklearn
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler() 
arr_scaled = scaler.fit_transform(df) 

print(arr_scaled)

df_scaled = pd.DataFrame(arr_scaled, columns=df.columns,index=df.index)
df_scaled

[[0.  0. ]
 [0.5 0.5]
 [1.  1. ]]


Unnamed: 0,A,B
0,0.0,0.0
1,0.5,0.5
2,1.0,1.0


## Normalizing we simply subtract the mean and divide by standard deviation.

In [16]:
df.iloc[:,0:-1] = df.iloc[:,0:-1].apply(lambda x: (x-x.mean())/ x.std(), axis=0)
df

Unnamed: 0,A,B
0,-1.0,100
1,0.0,300
2,1.0,500


In [None]:
import pandas as pd

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()


df = pd.DataFrame({
               'A':[1,2,3],
               'B':[100,300,500],
               'C':list('abc')
             })
df.iloc[:,0:-1] = scaler.fit_transform(df.iloc[:,0:-1].to_numpy())
df

Unnamed: 0,A,B,C
0,-1.224745,-1.224745,a
1,0.0,0.0,b
2,1.224745,1.224745,c
