# Onehot Encoder

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder

In [2]:
shape = pd.Series(['Square', 'Circle', 'Triangle', 'Rectangle'])
shape

0       Square
1       Circle
2     Triangle
3    Rectangle
dtype: object

In [3]:
ordinalNumber = pd.Series(['third', 'first', 'second', 'second'])
size = pd.Series(['S', 'S', 'L', 'XL'])

In [8]:
df = pd.DataFrame({'Shape':shape, 'Class':ordinalNumber, 'Size':size})

In [9]:
df

Unnamed: 0,Shape,Class,Size
0,Square,third,S
1,Circle,first,S
2,Triangle,second,L
3,Rectangle,second,XL


In [13]:
pd.get_dummies(df['Shape'], prefix='Shape', prefix_sep='-', drop_first=False)

Unnamed: 0,Shape-Circle,Shape-Rectangle,Shape-Square,Shape-Triangle
0,0,0,1,0
1,1,0,0,0
2,0,0,0,1
3,0,1,0,0


In [20]:
ohe = OneHotEncoder(sparse=False)
shape_dummies = ohe.fit_transform(df[['Shape']])

In [21]:
print(shape_dummies, type(shape_dummies))

[[0. 0. 1. 0.]
 [1. 0. 0. 0.]
 [0. 0. 0. 1.]
 [0. 1. 0. 0.]] <class 'numpy.ndarray'>


In [23]:
ode = OrdinalEncoder(categories=[['first', 'second', 'third', 'fourth'], ['S', 'M', 'L', 'XL']])
class_size_dummies = ode.fit_transform(df[['Class', 'Size']])

In [24]:
class_size_dummies.shape

(4, 2)

In [25]:
class_size_dummies

array([[2., 0.],
       [0., 0.],
       [1., 2.],
       [1., 3.]])

In [41]:
X = [['Circle', 'Male', 1], ['Square', 'Female', 2], ['Triangle', 'Female', 3]]
ohe = OneHotEncoder(drop='if_binary')
ohe = ohe.fit(X)
ohe.categories_

[array(['Circle', 'Square', 'Triangle'], dtype=object),
 array(['Female', 'Male'], dtype=object),
 array([1, 2, 3], dtype=object)]

In [42]:
arr_t = ohe.transform([['Circle', 'Male', 2], ['Square', 'Female', 1]]).toarray()

In [43]:
ohe.inverse_transform(arr_t)

array([['Circle', 'Male', 2],
       ['Square', 'Female', 1]], dtype=object)

In [44]:
arr_t

array([[1., 0., 0., 1., 0., 1., 0.],
       [0., 1., 0., 0., 1., 0., 0.]])