In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, OrdinalEncoder

In [2]:
data = {
    'Height': ['Short','Average','Tall','Short','Average','Tall','Short','Average','Tall'],
    'Gender': ['M','F','M','F','M','F','M','F','M'],
    'Color': ['White','Brown','Black','White','Brown','Black','White','Brown','Black'],
    'Review':['Poor', 'Nice','Excellent','Poor', 'Nice','Excellent','Poor', 'Nice','Excellent']
}

df = pd.DataFrame(data)

#display df
display(df)

Unnamed: 0,Height,Gender,Color,Review
0,Short,M,White,Poor
1,Average,F,Brown,Nice
2,Tall,M,Black,Excellent
3,Short,F,White,Poor
4,Average,M,Brown,Nice
5,Tall,F,Black,Excellent
6,Short,M,White,Poor
7,Average,F,Brown,Nice
8,Tall,M,Black,Excellent


### Way 1: get_dummies

In [3]:
df_dummies = df.copy()
df_dummies = pd.get_dummies(df_dummies)
display(df_dummies)

Unnamed: 0,Height_Average,Height_Short,Height_Tall,Gender_F,Gender_M,Color_Black,Color_Brown,Color_White,Review_Excellent,Review_Nice,Review_Poor
0,0,1,0,0,1,0,0,1,0,0,1
1,1,0,0,1,0,0,1,0,0,1,0
2,0,0,1,0,1,1,0,0,1,0,0
3,0,1,0,1,0,0,0,1,0,0,1
4,1,0,0,0,1,0,1,0,0,1,0
5,0,0,1,1,0,1,0,0,1,0,0
6,0,1,0,0,1,0,0,1,0,0,1
7,1,0,0,1,0,0,1,0,0,1,0
8,0,0,1,0,1,1,0,0,1,0,0


In [4]:
# To avoid dummies trap we generally drop first column pf dmmies variable for each category
# for e.g., Height column get 3 dummies cols (Height_Average	Height_Short	Height_Tall) so we drop first. 

df_dummies2 = df.copy()
df_dummies2 = pd.get_dummies(df_dummies2, drop_first= True)
display(df_dummies2)

Unnamed: 0,Height_Short,Height_Tall,Gender_M,Color_Brown,Color_White,Review_Nice,Review_Poor
0,1,0,1,0,1,0,1
1,0,0,0,1,0,1,0
2,0,1,1,0,0,0,0
3,1,0,0,0,1,0,1
4,0,0,1,1,0,1,0
5,0,1,0,0,0,0,0
6,1,0,1,0,1,0,1
7,0,0,0,1,0,1,0
8,0,1,1,0,0,0,0


### Way 2: Label Encode

In [5]:
# We know here in this data Height and review cols are example of ordinal data so, we can used label encoding to encode them in 
# in single column

cols = ['Height','Review']
df_lab_enc = pd.DataFrame()

#Label encoding
label_enc = LabelEncoder()
for col in cols:
    df_lab_enc[col] = label_enc.fit_transform(df[col])

display(df_lab_enc)

Unnamed: 0,Height,Review
0,1,2
1,0,1
2,2,0
3,1,2
4,0,1
5,2,0
6,1,2
7,0,1
8,2,0


### Way 3: Ordinal Encode

In [6]:
#Ordinal Encoding
df_ord_enc = OrdinalEncoder().fit_transform(df[cols])
df_ord_enc = pd.DataFrame(df_ord_enc, columns=cols)
#display df
display(df_ord_enc)

Unnamed: 0,Height,Review
0,1.0,2.0
1,0.0,1.0
2,2.0,0.0
3,1.0,2.0
4,0.0,1.0
5,2.0,0.0
6,1.0,2.0
7,0.0,1.0
8,2.0,0.0


### Way 4: OneHot Encode

In [7]:
#One Hot Encoding
cols =['Gender','Color']

onehot_enc = OneHotEncoder()
df_onehot_enc = onehot_enc.fit_transform(df[cols]).toarray()

cols = list(onehot_enc.categories_[0])+list(onehot_enc.categories_[1])
df_onehot_enc = pd.DataFrame(df_onehot_enc, columns= cols)

#display df
display(df_onehot_enc)

Unnamed: 0,F,M,Black,Brown,White
0,0.0,1.0,0.0,0.0,1.0
1,1.0,0.0,0.0,1.0,0.0
2,0.0,1.0,1.0,0.0,0.0
3,1.0,0.0,0.0,0.0,1.0
4,0.0,1.0,0.0,1.0,0.0
5,1.0,0.0,1.0,0.0,0.0
6,0.0,1.0,0.0,0.0,1.0
7,1.0,0.0,0.0,1.0,0.0
8,0.0,1.0,1.0,0.0,0.0


In [8]:
#One Hot Encoding
cols =['Gender','Color']

onehot_enc = OneHotEncoder(drop='first')
df_onehot_enc = onehot_enc.fit_transform(df[cols]).toarray()

cols = list(onehot_enc.categories_[0][1:])+list(onehot_enc.categories_[1][1:])
df_onehot_enc = pd.DataFrame(df_onehot_enc, columns= cols)

#display df
display(df_onehot_enc)

Unnamed: 0,M,Brown,White
0,1.0,0.0,1.0
1,0.0,1.0,0.0
2,1.0,0.0,0.0
3,0.0,0.0,1.0
4,1.0,1.0,0.0
5,0.0,0.0,0.0
6,1.0,0.0,1.0
7,0.0,1.0,0.0
8,1.0,0.0,0.0
