## onehotencoder:
OneHotEncoder is a preprocessing technique used in machine learning to convert
categorical data into a binary (0 or 1) matrix representation. 
It is especially useful for algorithms that cannot handle categorical variables directly,
like regression models or distance-based algorithms.

In [4]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder

### Perform One-Hot Encoding in Pandas

In [6]:
data = pd.DataFrame({
    'Color': ['Red', 'Green', 'Blue', 'Red'],
    'Size': ['S', 'M', 'L', 'XL']
})
print(data)


   Color Size
0    Red    S
1  Green    M
2   Blue    L
3    Red   XL


In [57]:
encoded_data = pd.get_dummies(data, columns=['Color'], prefix='Color', drop_first=False)
print(encoded_data)


  Size  Color_Blue  Color_Green  Color_Red
0    S       False        False       True
1    M       False         True      False
2    L        True        False      False
3   XL       False        False       True


In [59]:
encoded_data = pd.get_dummies(data, columns=['Color', 'Size'], drop_first=False)
print(encoded_data)

   Color_Blue  Color_Green  Color_Red  Size_L  Size_M  Size_S  Size_XL
0       False        False       True   False   False    True    False
1       False         True      False   False    True   False    False
2        True        False      False    True   False   False    False
3       False        False       True   False   False   False     True


In [61]:
encoded_data = pd.get_dummies(data, columns=['Color'], drop_first=True)
print(encoded_data)


  Size  Color_Green  Color_Red
0    S        False       True
1    M         True      False
2    L        False      False
3   XL        False       True


### Perform One-Hot Encoding in sklearn

In [46]:
encoder=OneHotEncoder(sparse_output=False)
encoded_data=encoder.fit_transform(data[['Color']])

In [48]:
print(encoded_data)

[[0. 0. 1.]
 [0. 1. 0.]
 [1. 0. 0.]
 [0. 0. 1.]]


### Add Encoded Data Back to DataFrame

In [51]:
encoded_column=encoder.get_feature_names_out(['Color'])
encoded_df=pd.DataFrame(data=encoded_data,columns=encoded_column)
final_data = pd.concat([data, encoded_df], axis=1).drop(columns=['Color'])
print(final_data)

  Size  Color_Blue  Color_Green  Color_Red
0    S         0.0          0.0        1.0
1    M         0.0          1.0        0.0
2    L         1.0          0.0        0.0
3   XL         0.0          0.0        1.0
