##  Label Encoder

In [23]:
classes = ['ClassA', 'ClassB', 'ClassC', 'ClassD']

instances = ['ClassA', 'ClassB', 'ClassC', 'ClassD', 'ClassA', 'ClassB', 'ClassC', 'ClassD', 'ClassA', 'ClassB']

In [24]:
label_to_int= {label:index for index, label in enumerate(classes)}
encoded_labels= [label_to_int[label] for label in instances]
print("Encoded labels:", encoded_labels)

Encoded labels: [0, 1, 2, 3, 0, 1, 2, 3, 0, 1]


In [25]:
int_to_label = {index: label for label, index in label_to_int.items()}
decoded_labels = [int_to_label[index] for index in encoded_labels]

print("Encoded labels:", encoded_labels)
print("Decoded labels:", decoded_labels)

Encoded labels: [0, 1, 2, 3, 0, 1, 2, 3, 0, 1]
Decoded labels: ['ClassA', 'ClassB', 'ClassC', 'ClassD', 'ClassA', 'ClassB', 'ClassC', 'ClassD', 'ClassA', 'ClassB']


## Sklearn - Label Encoder

In [26]:
from sklearn.preprocessing import LabelEncoder

In [27]:
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(instances)

print("Encoded labels:", encoded_labels)

Encoded labels: [0 1 2 3 0 1 2 3 0 1]


In [28]:
original_labels = label_encoder.inverse_transform(encoded_labels)

print("Encoded labels:", encoded_labels)
print("Original labels:", original_labels)

Encoded labels: [0 1 2 3 0 1 2 3 0 1]
Original labels: ['ClassA' 'ClassB' 'ClassC' 'ClassD' 'ClassA' 'ClassB' 'ClassC' 'ClassD'
 'ClassA' 'ClassB']


##  One Hot Encoding

In [29]:
import pandas as pd

In [30]:
data = {'Category': ['A', 'B', 'C', 'A', 'B', 'C', 'A', 'B', 'C']}

In [31]:
df = pd.DataFrame(data)
df.head()

Unnamed: 0,Category
0,A
1,B
2,C
3,A
4,B


In [32]:
one_hot_encoded_df = pd.get_dummies(df, columns=['Category'])
one_hot_encoded_df

Unnamed: 0,Category_A,Category_B,Category_C
0,True,False,False
1,False,True,False
2,False,False,True
3,True,False,False
4,False,True,False
5,False,False,True
6,True,False,False
7,False,True,False
8,False,False,True


In [33]:
one_hot_encoded_df = pd.get_dummies(df, columns=['Category'], prefix='Dummy')
one_hot_encoded_df

Unnamed: 0,Dummy_A,Dummy_B,Dummy_C
0,True,False,False
1,False,True,False
2,False,False,True
3,True,False,False
4,False,True,False
5,False,False,True
6,True,False,False
7,False,True,False
8,False,False,True


In [36]:
one_hot_encoded_df = pd.get_dummies(df, columns=['Category'], prefix='Dummy',drop_first=True )
one_hot_encoded_df

Unnamed: 0,Dummy_B,Dummy_C
0,False,False
1,True,False
2,False,True
3,False,False
4,True,False
5,False,True
6,False,False
7,True,False
8,False,True


In [37]:
df.head()

Unnamed: 0,Category
0,A
1,B
2,C
3,A
4,B


## Ordinal Encoder

In [38]:
import pandas as pd
from sklearn.preprocessing import OrdinalEncoder

In [39]:
data = [
    ['good'], ['bad'], ['excellent'], ['average'], 
    ['good'], ['average'], ['excellent'], ['bad'], 
    ['average'], ['good']
]

In [40]:
data = pd.DataFrame(data=data, columns=['reviews'])
data.head()

Unnamed: 0,reviews
0,good
1,bad
2,excellent
3,average
4,good


In [41]:
data.shape

(10, 1)

In [43]:
categories = [['bad', 'average', 'good', 'excellent']]

In [44]:
categories

[['bad', 'average', 'good', 'excellent']]

In [45]:
encoder = OrdinalEncoder(categories=categories)

In [46]:
encoded_data = encoder.fit_transform(data)
encoded_data

array([[2.],
       [0.],
       [3.],
       [1.],
       [2.],
       [1.],
       [3.],
       [0.],
       [1.],
       [2.]])

In [47]:
decoded_data = encoder.inverse_transform(encoded_data)
decoded_data

array([['good'],
       ['bad'],
       ['excellent'],
       ['average'],
       ['good'],
       ['average'],
       ['excellent'],
       ['bad'],
       ['average'],
       ['good']], dtype=object)