In [1]:
# One-Hot Encoding:
# One-hot encoding transforms each category value into a new binary column and assigns a 1 or O (presence or
# absence) value to the column. This method is widely used for nominal categories without intrinsic ordering.
# • Advantages:
#      Eliminates any ordinal relationship, making it suitable for nominal data.
#      Easy to understand and implement.
# • Disadvantages:
#      Can lead to a high-dimensional feature space, increasing memory and computational costs.
#      Not suitable for high cardinality features (many unique values).

In [2]:
import pandas as pd

In [3]:
data = {'Category': ['A', 'B', 'C', 'A', 'B', 'C','A', 'B', 'C']}

In [4]:
df = pd.DataFrame(data)
df.head()

Unnamed: 0,Category
0,A
1,B
2,C
3,A
4,B


In [5]:
one_hot_encoded_df = pd.get_dummies(df,columns=['Category'])
one_hot_encoded_df

Unnamed: 0,Category_A,Category_B,Category_C
0,True,False,False
1,False,True,False
2,False,False,True
3,True,False,False
4,False,True,False
5,False,False,True
6,True,False,False
7,False,True,False
8,False,False,True


In [10]:
one_hot_encoded_df = pd.get_dummies(df,columns=['Category'],prefix='Dummy',drop_first=True)
one_hot_encoded_df

Unnamed: 0,Dummy_B,Dummy_C
0,False,False
1,True,False
2,False,True
3,False,False
4,True,False
5,False,True
6,False,False
7,True,False
8,False,True
