# Label Encoder
# Ordinal Encoder
# One Hot Encoder

In [17]:
from sklearn.preprocessing import OneHotEncoder, LabelEncoder, OrdinalEncoder
import numpy as np
import pandas as pd

In [18]:
df = pd.DataFrame({
    'country': ['USA', 'UK', 'IND', 'BD', 'USA']
})

In [19]:
df

Unnamed: 0,country
0,USA
1,UK
2,IND
3,BD
4,USA


# Label Encoder

In [20]:
# Used for converting categorical data into numerical data, for independent and dependent variable
# It has some cons

le = LabelEncoder()
df['country_label_encoder'] = le.fit_transform(df['country'])

In [21]:
df

Unnamed: 0,country,country_label_encoder
0,USA,3
1,UK,2
2,IND,1
3,BD,0
4,USA,3


In [22]:
print(type(df['country_label_encoder']))

<class 'pandas.core.series.Series'>


In [23]:
df.describe()

Unnamed: 0,country_label_encoder
count,5.0
mean,1.8
std,1.30384
min,0.0
25%,1.0
50%,2.0
75%,3.0
max,3.0


In [24]:
df.columns

Index(['country', 'country_label_encoder'], dtype='object')

In [25]:
df

Unnamed: 0,country,country_label_encoder
0,USA,3
1,UK,2
2,IND,1
3,BD,0
4,USA,3


# Ordinal Encoder

In [31]:
# Used for dependent variable where there is an order

oe = OrdinalEncoder()
df['country_onehot_encoder'] = oe.fit_transform(df[['country']])

In [29]:
df

Unnamed: 0,country,country_label_encoder,country_onehot_encoder
0,USA,3,3.0
1,UK,2,2.0
2,IND,1,1.0
3,BD,0,0.0
4,USA,3,3.0


# OneHot Encoder

In [35]:
# Used for independent variable

ohe = OneHotEncoder()
country_onehot_encoder = ohe.fit_transform(df[['country']]).toarray()

In [36]:
country_onehot_encoder

array([[0., 0., 0., 1.],
       [0., 0., 1., 0.],
       [0., 1., 0., 0.],
       [1., 0., 0., 0.],
       [0., 0., 0., 1.]])

In [45]:
ohe.get_feature_names()

array(['x0_BD', 'x0_IND', 'x0_UK', 'x0_USA'], dtype=object)

In [46]:
ohe_df = pd.DataFrame(country_onehot_encoder, columns=ohe.get_feature_names())

In [47]:
ohe_df

Unnamed: 0,x0_BD,x0_IND,x0_UK,x0_USA
0,0.0,0.0,0.0,1.0
1,0.0,0.0,1.0,0.0
2,0.0,1.0,0.0,0.0
3,1.0,0.0,0.0,0.0
4,0.0,0.0,0.0,1.0


In [48]:
df = pd.concat([df, ohe_df], axis=1)

In [49]:
df

Unnamed: 0,country,country_label_encoder,country_onehot_encoder,x0_BD,x0_IND,x0_UK,x0_USA
0,USA,3,3.0,0.0,0.0,0.0,1.0
1,UK,2,2.0,0.0,0.0,1.0,0.0
2,IND,1,1.0,0.0,1.0,0.0,0.0
3,BD,0,0.0,1.0,0.0,0.0,0.0
4,USA,3,3.0,0.0,0.0,0.0,1.0
