# Example

In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler, MinMaxScaler

# Example dataset
df = pd.DataFrame({
    'Gender': ['Male', 'Female', 'Female', 'Male'],
    'Education': ['High School', 'Bachelor', 'Master', 'PhD'],
    'Age': [18, 25, 32, 40],
    'Income': [20000, 35000, 50000, 80000]
})

print("Original DataFrame:")
print(df)

Original DataFrame:
   Gender    Education  Age  Income
0    Male  High School   18   20000
1  Female     Bachelor   25   35000
2  Female       Master   32   50000
3    Male          PhD   40   80000


In [2]:
# --- Encoding ---
# Label Encoding (Education with order)
edu_order = {'High School':1, 'Bachelor':2, 'Master':3, 'PhD':4}
df['Education_encoded'] = df['Education'].map(edu_order)
print("\nDataFrame after Label Encoding (Education):")
print(df)


DataFrame after Label Encoding (Education):
   Gender    Education  Age  Income  Education_encoded
0    Male  High School   18   20000                  1
1  Female     Bachelor   25   35000                  2
2  Female       Master   32   50000                  3
3    Male          PhD   40   80000                  4


In [3]:
# One-Hot Encoding (Gender)
df = pd.get_dummies(df, columns=['Gender'])
print("\nDataFrame after One-Hot Encoding")
print(df)


DataFrame after One-Hot Encoding
     Education  Age  Income  Education_encoded  Gender_Female  Gender_Male
0  High School   18   20000                  1          False         True
1     Bachelor   25   35000                  2           True        False
2       Master   32   50000                  3           True        False
3          PhD   40   80000                  4          False         True


In [4]:
# --- Normalization ---
scaler = MinMaxScaler()
df['Age_norm'] = scaler.fit_transform(df[['Age']])
print("\nDataFrame after Normalization (Age):")
print(df)


DataFrame after Normalization (Age):
     Education  Age  Income  Education_encoded  Gender_Female  Gender_Male  \
0  High School   18   20000                  1          False         True   
1     Bachelor   25   35000                  2           True        False   
2       Master   32   50000                  3           True        False   
3          PhD   40   80000                  4          False         True   

   Age_norm  
0  0.000000  
1  0.318182  
2  0.636364  
3  1.000000  


In [5]:
# --- Standardization ---
scaler_std = StandardScaler()
df['Income_std'] = scaler_std.fit_transform(df[['Income']])
print("\nDataFrame after Standardization (Income):")
print(df)


DataFrame after Standardization (Income):
     Education  Age  Income  Education_encoded  Gender_Female  Gender_Male  \
0  High School   18   20000                  1          False         True   
1     Bachelor   25   35000                  2           True        False   
2       Master   32   50000                  3           True        False   
3          PhD   40   80000                  4          False         True   

   Age_norm  Income_std  
0  0.000000   -1.183216  
1  0.318182   -0.507093  
2  0.636364    0.169031  
3  1.000000    1.521278  


In [6]:
print("\nTransformed DataFrame:")
print(df)


Transformed DataFrame:
     Education  Age  Income  Education_encoded  Gender_Female  Gender_Male  \
0  High School   18   20000                  1          False         True   
1     Bachelor   25   35000                  2           True        False   
2       Master   32   50000                  3           True        False   
3          PhD   40   80000                  4          False         True   

   Age_norm  Income_std  
0  0.000000   -1.183216  
1  0.318182   -0.507093  
2  0.636364    0.169031  
3  1.000000    1.521278  


# Tasks 
Load the Titanic dataset (or another dataset with categorical + numerical features).
- Apply label encoding to Sex.
- Apply one-hot encoding to Embarked.
- Normalize the Age column.
- Standardize the Fare column.
- Compare the effect of normalization vs. standardization on a small dataset with highly varying scales. Visualize distributions before and after.