# <b> <i> One Hot Encoding

#### Importing Libraries

In [10]:

import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder


In [11]:
# Creating DataFrame with categorical variables
df = pd.DataFrame({
    'color': ['red', 'blue', 'green', 'yellow', 'green', 'blue', 'red', 'yellow', 'red']
})
df

Unnamed: 0,color
0,red
1,blue
2,green
3,yellow
4,green
5,blue
6,red
7,yellow
8,red


In [12]:
# Creating instance of OneHotEncoder class

encoder = OneHotEncoder()

In [13]:
# fit_transforming the categorical features

encoded = encoder.fit_transform(df[['color']])

In [39]:
encoder.get_feature_names_out()

array(['color_blue', 'color_green', 'color_red', 'color_yellow'],
      dtype=object)

In [40]:
# Coverting to Encoded Value to DataFrame

encoded_df = pd.DataFrame(encoded.toarray(), columns= encoder.get_feature_names_out())
encoded_df

Unnamed: 0,color_blue,color_green,color_red,color_yellow
0,0.0,0.0,1.0,0.0
1,1.0,0.0,0.0,0.0
2,0.0,1.0,0.0,0.0
3,0.0,0.0,0.0,1.0
4,0.0,1.0,0.0,0.0
5,1.0,0.0,0.0,0.0
6,0.0,0.0,1.0,0.0
7,0.0,0.0,0.0,1.0
8,0.0,0.0,1.0,0.0


In [41]:
# Concatinating it with the original dataframe

pd.concat([df, encoded_df], axis=1)

Unnamed: 0,color,color_blue,color_green,color_red,color_yellow
0,red,0.0,0.0,1.0,0.0
1,blue,1.0,0.0,0.0,0.0
2,green,0.0,1.0,0.0,0.0
3,yellow,0.0,0.0,0.0,1.0
4,green,0.0,1.0,0.0,0.0
5,blue,1.0,0.0,0.0,0.0
6,red,0.0,0.0,1.0,0.0
7,yellow,0.0,0.0,0.0,1.0
8,red,0.0,0.0,1.0,0.0


# <b><i> Label Encoding

In [42]:
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()

encoder.fit_transform(df['color'])

array([2, 0, 1, 3, 1, 0, 2, 3, 2])

# <i> <b> Ordinal Encoding

In [49]:
from sklearn.preprocessing import OrdinalEncoder

df2 = pd.DataFrame({
    'size': ['large', 'medium', 'small', 'small', 'large', 'medium']
})

encoder = OrdinalEncoder(categories=[['small', 'medium', 'large']])

encoder.fit_transform(df2[['size']])

array([[2.],
       [1.],
       [0.],
       [0.],
       [2.],
       [1.]])

# <i><b> Target Guided Ordinal Encoding

In [52]:
df = pd.DataFrame({
    'city': ['New York', 'London', 'Paris', 'Tokyo', 'New York', 'Paris'],
    'price': [200,150,300,250,180,320]
})

df

Unnamed: 0,city,price
0,New York,200
1,London,150
2,Paris,300
3,Tokyo,250
4,New York,180
5,Paris,320


- Calculate mean of prices for each city

In [54]:
mean_price = df.groupby('city')['price'].mean().to_dict()
mean_price

{'London': 150.0, 'New York': 190.0, 'Paris': 310.0, 'Tokyo': 250.0}

- Replace each city with its mean price

In [55]:
df['city_mean'] = df['city'].map(mean_price)

In [56]:
df

Unnamed: 0,city,price,city_mean
0,New York,200,190.0
1,London,150,150.0
2,Paris,300,310.0
3,Tokyo,250,250.0
4,New York,180,190.0
5,Paris,320,310.0


# END