### Target Guided Ordinal Encoding

##### It is a technique used to encode categorical variables based on their relationship with the target variable. This encoding technique is useful when we have a categorical variable with a large number of unique categories, and we want to use this variables as a feature in our machine learning model.

##### In Target Guided Ordinal Encoding we replace each category in the categorical variable with a numerical value based on the mean or median of the target variable for that category. This creates a monotonic relationship between the categorical variable and the target variable, which can improve the predictive power of our model.

In [4]:
import pandas as pd

#create a sample dataframe with a categorical variable and a target variable 

df = pd.DataFrame({'city': ['London', 'Manchester', 'Liverpool', 'Bristol', 'London', 'Manchester', 'Liverpool', 'Bristol'],
                   'price': [1000, 2000, 3000, 4000, 500, 600, 700, 800]
                   })

In [5]:
df

Unnamed: 0,city,price
0,London,1000
1,Manchester,2000
2,Liverpool,3000
3,Bristol,4000
4,London,500
5,Manchester,600
6,Liverpool,700
7,Bristol,800


In [7]:
mean_price = df.groupby('city')['price'].mean().to_dict()
mean_price

{'Bristol': 2400.0, 'Liverpool': 1850.0, 'London': 750.0, 'Manchester': 1300.0}

In [8]:
df['city_encoded'] = df['city'].map(mean_price)
df

Unnamed: 0,city,price,city_encoded
0,London,1000,750.0
1,Manchester,2000,1300.0
2,Liverpool,3000,1850.0
3,Bristol,4000,2400.0
4,London,500,750.0
5,Manchester,600,1300.0
6,Liverpool,700,1850.0
7,Bristol,800,2400.0


In [9]:
df[['city_encoded','price']]

Unnamed: 0,city_encoded,price
0,750.0,1000
1,1300.0,2000
2,1850.0,3000
3,2400.0,4000
4,750.0,500
5,1300.0,600
6,1850.0,700
7,2400.0,800


In [10]:
import seaborn as sns

tips_df=sns.load_dataset('tips')

In [11]:
tips_df

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.50,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4
...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3
240,27.18,2.00,Female,Yes,Sat,Dinner,2
241,22.67,2.00,Male,Yes,Sat,Dinner,2
242,17.82,1.75,Male,No,Sat,Dinner,2


In [12]:
tips_df[['time','total_bill']]

Unnamed: 0,time,total_bill
0,Dinner,16.99
1,Dinner,10.34
2,Dinner,21.01
3,Dinner,23.68
4,Dinner,24.59
...,...,...
239,Dinner,29.03
240,Dinner,27.18
241,Dinner,22.67
242,Dinner,17.82


In [22]:
tips_mean_price = tips_df.groupby('time')['total_bill'].mean().round(2).to_dict()
tips_mean_price

  tips_mean_price = tips_df.groupby('time')['total_bill'].mean().round(2).to_dict()


{'Lunch': 17.17, 'Dinner': 20.8}

In [23]:
tips_df['time_encoded'] = tips_df['time'].map(tips_mean_price)
tips_df

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,time_encoded
0,16.99,1.01,Female,No,Sun,Dinner,2,20.8
1,10.34,1.66,Male,No,Sun,Dinner,3,20.8
2,21.01,3.50,Male,No,Sun,Dinner,3,20.8
3,23.68,3.31,Male,No,Sun,Dinner,2,20.8
4,24.59,3.61,Female,No,Sun,Dinner,4,20.8
...,...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3,20.8
240,27.18,2.00,Female,Yes,Sat,Dinner,2,20.8
241,22.67,2.00,Male,Yes,Sat,Dinner,2,20.8
242,17.82,1.75,Male,No,Sat,Dinner,2,20.8


In [25]:
tips_df[['time_encoded','total_bill']]

Unnamed: 0,time_encoded,total_bill
0,20.8,16.99
1,20.8,10.34
2,20.8,21.01
3,20.8,23.68
4,20.8,24.59
...,...,...
239,20.8,29.03
240,20.8,27.18
241,20.8,22.67
242,20.8,17.82
