## One-hot Encoding

In [3]:
import pandas as pd 

df = pd.DataFrame({"col1": ["Sat", "Sun", "Mon", "Tue", "Wed", "Thu", "Fri"]})
print(df)

  col1
0  Sat
1  Sun
2  Mon
3  Tue
4  Wed
5  Thu
6  Fri


In [5]:
df_new = pd.get_dummies(df, columns=["col1"], prefix="Weekday")
print(df_new)

   Weekday_Fri  Weekday_Mon  Weekday_Sat  Weekday_Sun  Weekday_Thu  \
0        False        False         True        False        False   
1        False        False        False         True        False   
2        False         True        False        False        False   
3        False        False        False        False        False   
4        False        False        False        False        False   
5        False        False        False        False         True   
6         True        False        False        False        False   

   Weekday_Tue  Weekday_Wed  
0        False        False  
1        False        False  
2        False        False  
3         True        False  
4        False         True  
5        False        False  
6        False        False  


### Example using pandas

In [8]:
import pandas as pd
 
ids = [11, 22, 33, 44, 55, 66, 77]
countries = ['Seattle', 'London', 'Lahore', 'Berlin', 'Abuja']
 
df = pd.DataFrame(list(zip(ids, countries)), columns=['Ids', 'Cities'])
print(df.head())

   Ids   Cities
0   11  Seattle
1   22   London
2   33   Lahore
3   44   Berlin
4   55    Abuja


In [14]:
df_new = pd.get_dummies(df, columns=["Cities"], prefix='City')
print(df_new.head())

   Ids  City_Abuja  City_Berlin  City_Lahore  City_London  City_Seattle
0   11       False        False        False        False          True
1   22       False        False        False         True         False
2   33       False        False         True        False         False
3   44       False         True        False        False         False
4   55        True        False        False        False         False


### Example using sklearn

In [20]:
import sklearn.preprocessing as preprocessing
import numpy as np

targets = np.array(["red", "green", "blue", "yellow", "pink", "white"])
print("targets =", targets)

labelEnc = preprocessing.LabelEncoder()
new_target = labelEnc.fit_transform(targets)
print("new_targets (labels) =", new_target)

onehotEnc = preprocessing.OneHotEncoder()
onehotEnc.fit(new_target.reshape(-1, 1))
targets_trans = onehotEnc.transform(new_target.reshape(-1, 1))
print("One-hot encoded targets =\n", targets_trans.toarray())

targets = ['red' 'green' 'blue' 'yellow' 'pink' 'white']
new_targets (labels) = [3 1 0 5 2 4]
One-hot encoded targets =
 [[0. 0. 0. 1. 0. 0.]
 [0. 1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1.]
 [0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0.]]
