# 🔰 Step 1: Import Libraries

In [1]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder

# 🔰 Step 2: Create a Small Categorical Dataset

In [2]:
df = pd.DataFrame({
    'Color': ['Red', 'Green', 'Blue', 'Green', 'Red'],
    'Size': ['S', 'M', 'L', 'S', 'M']
})

print("📋 Original Data:")
print(df)


📋 Original Data:
   Color Size
0    Red    S
1  Green    M
2   Blue    L
3  Green    S
4    Red    M


# 🔰 Step 3: One Hot Encoding using sklearn

In [4]:
encoder = OneHotEncoder(sparse_output=False)

In [5]:
# Fit and transform
encoded_array = encoder.fit_transform(df[['Color', 'Size']])

In [6]:
# Convert to DataFrame with proper column names
encoded_df = pd.DataFrame(encoded_array, columns=encoder.get_feature_names_out(['Color', 'Size']))

print("\n🔵 One Hot Encoded Data (sklearn):")
print(encoded_df)


🔵 One Hot Encoded Data (sklearn):
   Color_Blue  Color_Green  Color_Red  Size_L  Size_M  Size_S
0         0.0          0.0        1.0     0.0     0.0     1.0
1         0.0          1.0        0.0     0.0     1.0     0.0
2         1.0          0.0        0.0     1.0     0.0     0.0
3         0.0          1.0        0.0     0.0     0.0     1.0
4         0.0          0.0        1.0     0.0     1.0     0.0


# Dummy Variables:

# 🔰 Step 4: Dummy Variables using pandas

In [8]:
dummy_df = pd.get_dummies(df, drop_first=True)

print("\n🟢 Dummy Variable Encoded Data (pandas):")
print(dummy_df)


🟢 Dummy Variable Encoded Data (pandas):
   Color_Green  Color_Red  Size_M  Size_S
0        False       True   False    True
1         True      False    True   False
2        False      False   False   False
3         True      False   False    True
4        False       True    True   False
