# <h1 align="center"> © RsydMathTech. <h1/>

# <h1 align="center">Data Normalization Formula With Example. <h1/>

In [17]:
import numpy as np
import pandas as pd

In [18]:
data = {
    'Salary': [10000, 15000, 20000, 30000, 50000],
    'Age': [18, 25, 35, 45, 60],
    'Rating': [2, 3, 4, 4, 5]
}

df = pd.DataFrame(data)
print("=== Data Asli ===")
display(df)

=== Data Asli ===


Unnamed: 0,Salary,Age,Rating
0,10000,18,2
1,15000,25,3
2,20000,35,4
3,30000,45,4
4,50000,60,5


# 1. Simple Feature Scaling


In [4]:
print("Formula: Xnew = Xold / Xmax")
df_simple = df / df.max()
display(df_simple)

Formula: Xnew = Xold / Xmax


Unnamed: 0,Salary,Age,Rating
0,0.2,0.3,0.4
1,0.3,0.416667,0.6
2,0.4,0.583333,0.8
3,0.6,0.75,0.8
4,1.0,1.0,1.0


# 2. Min-Max Normalization


In [5]:
print("Formula: Xnew = (Xold - Xmin) / (Xmax - Xmin)")
df_minmax = (df - df.min()) / (df.max() - df.min())
display(df_minmax)

Formula: Xnew = (Xold - Xmin) / (Xmax - Xmin)


Unnamed: 0,Salary,Age,Rating
0,0.0,0.0,0.0
1,0.125,0.166667,0.333333
2,0.25,0.404762,0.666667
3,0.5,0.642857,0.666667
4,1.0,1.0,1.0


# 3. Z-score Normalization


In [6]:
print("Formula: Xnew = (Xold - mean) / std")
df_zscore = (df - df.mean()) / df.std()
display(df_zscore)

Formula: Xnew = (Xold - mean) / std


Unnamed: 0,Salary,Age,Rating
0,-0.948683,-1.121011,-1.403293
1,-0.632456,-0.699125,-0.526235
2,-0.316228,-0.096431,0.350823
3,0.316228,0.506263,0.350823
4,1.581139,1.410304,1.227881


# <h1 align="center">Binning. <h1/>

In [9]:
import pandas as pd

# Example dataset
price = [5000, 12000, 18000, 25000, 40000, 60000, 85000]
df = pd.DataFrame({"price": price})

# Define bins (intervals)
bins = [0, 15000, 30000, 60000, 100000]  
labels = ["Low", "Medium", "High", "Very High"]

# Apply binning
df["price_category"] = pd.cut(df["price"], bins=bins, labels=labels, include_lowest=True)

df

Unnamed: 0,price,price_category
0,5000,Low
1,12000,Low
2,18000,Medium
3,25000,Medium
4,40000,High
5,60000,High
6,85000,Very High


### # <h1 align="center">Encoding categorical variables → numeric (Quantitative). <h1/>

## Education with Salary Contextual

In [20]:
from sklearn.preprocessing import LabelEncoder

df = pd.DataFrame({
    "Name": ["John", "Alice", "Bob", "Sarah"],
    "Education": ["High School", "Bachelor", "Master", "PhD"],
    "Salary": [2500, 4000, 6000, 8000]
})

# Label Encoding
le = LabelEncoder()
df["Education_Label"] = le.fit_transform(df["Education"])

df

Unnamed: 0,Name,Education,Salary,Education_Label
0,John,High School,2500,1
1,Alice,Bachelor,4000,0
2,Bob,Master,6000,2
3,Sarah,PhD,8000,3


In [21]:
df_onehot = pd.get_dummies(df, columns=["Education"])

df_onehot

Unnamed: 0,Name,Salary,Education_Label,Education_Bachelor,Education_High School,Education_Master,Education_PhD
0,John,2500,1,False,True,False,False
1,Alice,4000,0,True,False,False,False
2,Bob,6000,2,False,False,True,False
3,Sarah,8000,3,False,False,False,True


In [22]:
edu_map = {"High School": 1, "Bachelor": 2, "Master": 3, "PhD": 4}
df["Education_Ordinal"] = df["Education"].map(edu_map)

df

Unnamed: 0,Name,Education,Salary,Education_Label,Education_Ordinal
0,John,High School,2500,1,1
1,Alice,Bachelor,4000,0,2
2,Bob,Master,6000,2,3
3,Sarah,PhD,8000,3,4


## Vehicle Contextual

In [24]:
df = pd.DataFrame({
    "Car": ["A", "B", "C", "D"],
    "Fuel": ["Gasoline", "Diesel", "Electric", "Gasoline"],
    "Price": [20000, 25000, 35000, 22000]
})

le = LabelEncoder()
df["Fuel_Label"] = le.fit_transform(df["Fuel"])

df

Unnamed: 0,Car,Fuel,Price,Fuel_Label
0,A,Gasoline,20000,2
1,B,Diesel,25000,0
2,C,Electric,35000,1
3,D,Gasoline,22000,2


In [27]:
df_onehot = pd.get_dummies(df, columns=["Fuel"])

df_onehot

Unnamed: 0,Car,Price,Fuel_Label,Fuel_Ordinal,Fuel_Diesel,Fuel_Electric,Fuel_Gasoline
0,A,20000,2,2,False,False,True
1,B,25000,0,1,True,False,False
2,C,35000,1,3,False,True,False
3,D,22000,2,2,False,False,True


In [28]:
fuel_map = {"Diesel": 1, "Gasoline": 2, "Electric": 3}
df["Fuel_Ordinal"] = df["Fuel"].map(fuel_map)

df

Unnamed: 0,Car,Fuel,Price,Fuel_Label,Fuel_Ordinal
0,A,Gasoline,20000,2,2
1,B,Diesel,25000,0,1
2,C,Electric,35000,1,3
3,D,Gasoline,22000,2,2
