In [1]:
#One Hot Encoding
#Creates binary columns (0s and 1s) for each category in a feature.
#Each category gets its own column with a "1" indicating its presence.
#Common for non-ordinal categorical data without an inherent order, like color, city, etc.
import pandas as pd

#Sample data with a categorical feature "Color"
data = {'Color': ['Red', 'Green', 'Blue', 'Green', 'Red']}
df = pd.DataFrame(data)

# Display the original DataFrame
print("Original DataFrame:")
print(df)

# Apply One-Hot Encoding
df_encoded = pd.get_dummies(df, columns=['Color'], prefix='Color')
#automatically creates a new column for each unique value in the specified categorical column

# Display the one-hot encoded DataFrame
print("\nOne-Hot Encoded DataFrame:")
print(df_encoded)


Original DataFrame:
   Color
0    Red
1  Green
2   Blue
3  Green
4    Red

One-Hot Encoded DataFrame:
   Color_Blue  Color_Green  Color_Red
0       False        False       True
1       False         True      False
2        True        False      False
3       False         True      False
4       False        False       True


In [2]:
#Label Encoding
#Assigns a unique integer to each category in a feature.
#Often used with ordinal features where the categories have an inherent order (like low, medium, high).
import pandas as pd
from sklearn.preprocessing import LabelEncoder
#Using the same sample dataframe as before
# Display the original DataFrame
print("Original DataFrame:")
print(df)

#Initialize the Label Encoder
label_encoder = LabelEncoder()
#used to convert categorical labels into integers.

#Apply Label Encoding
df['Color_Encoded'] = label_encoder.fit_transform(df['Color'])

#Display the DataFrame with the encoded column
print("\nLabel Encoded DataFrame:")
print(df)

Original DataFrame:
   Color
0    Red
1  Green
2   Blue
3  Green
4    Red

Label Encoded DataFrame:
   Color  Color_Encoded
0    Red              2
1  Green              1
2   Blue              0
3  Green              1
4    Red              2
