In [1]:
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
import pandas as pd
import numpy as np
import seaborn as sns

### One Hot Encoding

In [27]:
d = {'ID':[1,2,3,4,5],
    'Color':["Red","Blue","Green","Blue","Blue"],
    "Country":['USA',"UK","Canada","USA","USA"]}
df = pd.DataFrame(d)
df

Unnamed: 0,ID,Color,Country
0,1,Red,USA
1,2,Blue,UK
2,3,Green,Canada
3,4,Blue,USA
4,5,Blue,USA


In [28]:
df.dtypes

ID          int64
Color      object
Country    object
dtype: object

In [29]:
df.Country.unique()

array(['USA', 'UK', 'Canada'], dtype=object)

In [30]:
df.Color.unique()

array(['Red', 'Blue', 'Green'], dtype=object)

In [34]:
ohe = OneHotEncoder()
ohe = ohe.fit(df[["Color","Country"]])

In [36]:
feature_labels = ohe.categories_
feature_labels

[array(['Blue', 'Green', 'Red'], dtype=object),
 array(['Canada', 'UK', 'USA'], dtype=object)]

In [37]:
feature_labels = np.array(feature_labels).ravel()
feature_labels

array(['Blue', 'Green', 'Red', 'Canada', 'UK', 'USA'], dtype=object)

In [38]:
feature_array = ohe.transform(df[['Color','Country']]).toarray()
feature_array

array([[0., 0., 1., 0., 0., 1.],
       [1., 0., 0., 0., 1., 0.],
       [0., 1., 0., 1., 0., 0.],
       [1., 0., 0., 0., 0., 1.],
       [1., 0., 0., 0., 0., 1.]])

In [39]:
df2 = pd.DataFrame(feature_array, columns=feature_labels)
df2

Unnamed: 0,Blue,Green,Red,Canada,UK,USA
0,0.0,0.0,1.0,0.0,0.0,1.0
1,1.0,0.0,0.0,0.0,1.0,0.0
2,0.0,1.0,0.0,1.0,0.0,0.0
3,1.0,0.0,0.0,0.0,0.0,1.0
4,1.0,0.0,0.0,0.0,0.0,1.0


In [44]:
df = df.drop(["Color",'Country'], axis=1)
df

Unnamed: 0,ID
0,1
1,2
2,3
3,4
4,5


In [46]:
df_transformed = pd.concat([df, df2], axis=1)
df_transformed

Unnamed: 0,ID,Blue,Green,Red,Canada,UK,USA
0,1,0.0,0.0,1.0,0.0,0.0,1.0
1,2,1.0,0.0,0.0,0.0,1.0,0.0
2,3,0.0,1.0,0.0,1.0,0.0,0.0
3,4,1.0,0.0,0.0,0.0,0.0,1.0
4,5,1.0,0.0,0.0,0.0,0.0,1.0


### Label Encoding

In [48]:
d = {'ID':[1,2,3,4,5],
    'Color':["Red","Blue","Green","Blue","Blue"],
    "Country":['USA',"UK","Canada","USA","USA"]}
df = pd.DataFrame(d)
df

Unnamed: 0,ID,Color,Country
0,1,Red,USA
1,2,Blue,UK
2,3,Green,Canada
3,4,Blue,USA
4,5,Blue,USA


In [49]:
le = LabelEncoder()
le = le.fit(df["Country"])
df.Country = le.transform(df.Country)
df

Unnamed: 0,ID,Color,Country
0,1,Red,2
1,2,Blue,1
2,3,Green,0
3,4,Blue,2
4,5,Blue,2


In [50]:
le.fit(df.Color)
df.Color = le.transform(df.Color)
df

Unnamed: 0,ID,Color,Country
0,1,2,2
1,2,0,1
2,3,1,0
3,4,0,2
4,5,0,2
