# Features Encoding

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

## One Hot Encoding

In [2]:
data = {'colors' : ['Red','Green','Red','Blue','Green','Yellow']}
df = pd.DataFrame(data)
df

Unnamed: 0,colors
0,Red
1,Green
2,Red
3,Blue
4,Green
5,Yellow


In [3]:
onehot = pd.get_dummies(df,columns=['colors'])
onehot

Unnamed: 0,colors_Blue,colors_Green,colors_Red,colors_Yellow
0,False,False,True,False
1,False,True,False,False
2,False,False,True,False
3,True,False,False,False
4,False,True,False,False
5,False,False,False,True


In [4]:
onehot = pd.get_dummies(df, columns=['colors'], dtype=int)
onehot

Unnamed: 0,colors_Blue,colors_Green,colors_Red,colors_Yellow
0,0,0,1,0
1,0,1,0,0
2,0,0,1,0
3,1,0,0,0
4,0,1,0,0
5,0,0,0,1


## Label Encoding

In [5]:
from sklearn.preprocessing import LabelEncoder

In [6]:
data = {'animal' : ['Cat','Dog','Bull','Cow','Dog','Cat']}
df = pd.DataFrame(data)
df

Unnamed: 0,animal
0,Cat
1,Dog
2,Bull
3,Cow
4,Dog
5,Cat


In [8]:
lbe = LabelEncoder()
df['animal_encoded'] = lbe.fit_transform(df['animal'])
df

Unnamed: 0,animal,animal_encoded
0,Cat,1
1,Dog,3
2,Bull,0
3,Cow,2
4,Dog,3
5,Cat,1


## Ordinal Encoding

In [10]:
from sklearn.preprocessing import OrdinalEncoder

In [11]:
data = {'size': ['Small', 'Medium', 'Large', 'Small', 'Large']}
df = pd.DataFrame(data)
df

Unnamed: 0,size
0,Small
1,Medium
2,Large
3,Small
4,Large


In [13]:
encoder = OrdinalEncoder(categories=[['Small', 'Medium', 'Large']])
df['size_encoded_custom'] = encoder.fit_transform(df[['size']])
df

Unnamed: 0,size,size_encoded_custom
0,Small,0.0
1,Medium,1.0
2,Large,2.0
3,Small,0.0
4,Large,2.0


## Binary Encoding

In [None]:
!pip install category_encoders

In [21]:
import category_encoders as ce

In [22]:
df = pd.DataFrame({'color': ['Red', 'Green', 'Blue', 'Green', 'Red']})

encoder = ce.BinaryEncoder(cols=['color'])
df_binary = encoder.fit_transform(df)

print(df_binary)

   color_0  color_1
0        0        1
1        1        0
2        1        1
3        1        0
4        0        1


## Frequency Encoding

In [26]:
df = pd.DataFrame({'color': ['Red', 'Green', 'Blue', 'Green', 'Red']})

frequency = df['color'].value_counts() / len(df)
df['color_encoded'] = df['color'].map(frequency)

print(df)

   color  color_encoded
0    Red            0.4
1  Green            0.4
2   Blue            0.2
3  Green            0.4
4    Red            0.4
