In [468]:
import random

random.seed(42)

import pandas as pd

In [469]:
data = {
    "Country": [
        "USA", "USA", "UK", "Canada", "India", "USA", "France", "India",
        "UK", "Germany", "Canada", "France", "Australia", "India", "UK",
        "France", "Australia", "Germany", "Canada", "Australia"
    ],
    "Price": [random.randint(500, 1500) for _ in range(20)]
}

In [470]:
df = pd.DataFrame(data)

In [471]:
df.head()

Unnamed: 0,Country,Price
0,USA,1154
1,USA,614
2,UK,525
3,Canada,1259
4,India,781


In [472]:
mean_price = df.groupby("Country")["Price"].mean().round(decimals=2).to_dict()

In [473]:
mean_price

{'Australia': 980.0,
 'Canada': 993.67,
 'France': 1030.0,
 'Germany': 568.0,
 'India': 827.0,
 'UK': 789.33,
 'USA': 839.33}

In [474]:
df["Price_Encoded"] = df['Country'].map(mean_price)

In [475]:
df

Unnamed: 0,Country,Price,Price_Encoded
0,USA,1154,839.33
1,USA,614,839.33
2,UK,525,789.33
3,Canada,1259,993.67
4,India,781,827.0
5,USA,750,839.33
6,France,728,1030.0
7,India,642,827.0
8,UK,1254,789.33
9,Germany,604,568.0


In [476]:
# Applied Target Encoding by mapping each country to its average Price.
# Result: "Price_Encoded" is the feature, and "Price" is the target for regression.

In [477]:
import seaborn as sns

In [478]:
df = sns.load_dataset('tips')
df

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.50,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4
...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3
240,27.18,2.00,Female,Yes,Sat,Dinner,2
241,22.67,2.00,Male,Yes,Sat,Dinner,2
242,17.82,1.75,Male,No,Sat,Dinner,2


In [479]:
df['time'].unique()

['Dinner', 'Lunch']
Categories (2, object): ['Lunch', 'Dinner']

In [480]:
# We will apply Target Encoding to the "time" column by replacing "Dinner" and "Lunch"
# with the average "total_bill" for each category.
# This helps the model use a numeric representation of "time" based on its relation to the target.

In [481]:
time_encoded_mean = df.groupby("time", observed=True)['total_bill'].mean().round(decimals=2)

In [482]:
df['time_encoded'] = df['time'].map(time_encoded_mean)

In [483]:
df.columns = ['time_encoded', 'tip', 'sex', 'smoker', 'day', 'time', 'size', 'total_bill']

In [484]:
df.head()

Unnamed: 0,time_encoded,tip,sex,smoker,day,time,size,total_bill
0,16.99,1.01,Female,No,Sun,Dinner,2,20.8
1,10.34,1.66,Male,No,Sun,Dinner,3,20.8
2,21.01,3.5,Male,No,Sun,Dinner,3,20.8
3,23.68,3.31,Male,No,Sun,Dinner,2,20.8
4,24.59,3.61,Female,No,Sun,Dinner,4,20.8
