In [1]:
import pandas as pd
import numpy as np

In [2]:
data = {'Temperature':['Hot','Cold','Very Hot','Warm','Hot','Warm','Warm','Hot','Hot','Cold'],
        'Color':['Red','Yellow','Blue','Blue','Red','Yellow','Red','Yellow','Yellow','Yellow'],
        'Target':[1,1,1,0,1,0,1,0,1,1]}
df = pd.DataFrame(data)
df

Unnamed: 0,Temperature,Color,Target
0,Hot,Red,1
1,Cold,Yellow,1
2,Very Hot,Blue,1
3,Warm,Blue,0
4,Hot,Red,1
5,Warm,Yellow,0
6,Warm,Red,1
7,Hot,Yellow,0
8,Hot,Yellow,1
9,Cold,Yellow,1


In [3]:
### One Hot Encoding

df_ohe = df.copy()

ohe = pd.get_dummies(df_ohe,columns=['Color'],drop_first=True,prefix='C_',dtype = 'int')

ohe.insert(loc=2,column='Color',value=df.Color.values)

ohe

Unnamed: 0,Temperature,Target,Color,C__Red,C__Yellow
0,Hot,1,Red,1,0
1,Cold,1,Yellow,0,1
2,Very Hot,1,Blue,0,0
3,Warm,0,Blue,0,0
4,Hot,1,Red,1,0
5,Warm,0,Yellow,0,1
6,Warm,1,Red,1,0
7,Hot,0,Yellow,0,1
8,Hot,1,Yellow,0,1
9,Cold,1,Yellow,0,1


In [4]:
# using OneHotEncoder

from sklearn.preprocessing import OneHotEncoder

# create ohe instance
encoder = OneHotEncoder(sparse_output=False,drop='first')

# fit and transform
encoded_data = encoder.fit_transform(df[['Color']])

# create the dataframe

encoded_df = pd.DataFrame(encoded_data,columns=encoder.get_feature_names_out(['Color']))

# concat the data
df_encoded =pd.concat([df,encoded_df],axis=1)

df_encoded

Unnamed: 0,Temperature,Color,Target,Color_Red,Color_Yellow
0,Hot,Red,1,1.0,0.0
1,Cold,Yellow,1,0.0,1.0
2,Very Hot,Blue,1,0.0,0.0
3,Warm,Blue,0,0.0,0.0
4,Hot,Red,1,1.0,0.0
5,Warm,Yellow,0,0.0,1.0
6,Warm,Red,1,1.0,0.0
7,Hot,Yellow,0,0.0,1.0
8,Hot,Yellow,1,0.0,1.0
9,Cold,Yellow,1,0.0,1.0


In [5]:
### binary encoding
import category_encoders as ce

be = ce.BinaryEncoder(cols=['Temperature'])

be_df = be.fit_transform(df['Temperature'])

binary_df = pd.concat([df,be_df],axis =1)

binary_df

Unnamed: 0,Temperature,Color,Target,Temperature_0,Temperature_1,Temperature_2
0,Hot,Red,1,0,0,1
1,Cold,Yellow,1,0,1,0
2,Very Hot,Blue,1,0,1,1
3,Warm,Blue,0,1,0,0
4,Hot,Red,1,0,0,1
5,Warm,Yellow,0,1,0,0
6,Warm,Red,1,1,0,0
7,Hot,Yellow,0,0,0,1
8,Hot,Yellow,1,0,0,1
9,Cold,Yellow,1,0,1,0


In [6]:
## mean encoding / target encoding

df3 = df.copy()

df3

Unnamed: 0,Temperature,Color,Target
0,Hot,Red,1
1,Cold,Yellow,1
2,Very Hot,Blue,1
3,Warm,Blue,0
4,Hot,Red,1
5,Warm,Yellow,0
6,Warm,Red,1
7,Hot,Yellow,0
8,Hot,Yellow,1
9,Cold,Yellow,1


In [7]:
# mean encoding using pandas

mean_enc = df3.groupby('Temperature')['Target'].mean()

mean_enc

Temperature
Cold        1.000000
Hot         0.750000
Very Hot    1.000000
Warm        0.333333
Name: Target, dtype: float64

In [8]:
df3['temp_mean_enc'] = df['Temperature'].map(mean_enc)

In [9]:
df3

Unnamed: 0,Temperature,Color,Target,temp_mean_enc
0,Hot,Red,1,0.75
1,Cold,Yellow,1,1.0
2,Very Hot,Blue,1,1.0
3,Warm,Blue,0,0.333333
4,Hot,Red,1,0.75
5,Warm,Yellow,0,0.333333
6,Warm,Red,1,0.333333
7,Hot,Yellow,0,0.75
8,Hot,Yellow,1,0.75
9,Cold,Yellow,1,1.0


In [11]:
# target encoding
import category_encoders as ce

te = ce.TargetEncoder(cols=['Temperature'])


df3['temp_tar_enc'] = te.fit_transform(df['Temperature'],df3.Target)


df3

Unnamed: 0,Temperature,Color,Target,temp_mean_enc,temp_tar_enc
0,Hot,Red,1,0.75,0.708399
1,Cold,Yellow,1,1.0,0.742555
2,Very Hot,Blue,1,1.0,0.739033
3,Warm,Blue,0,0.333333,0.643363
4,Hot,Red,1,0.75,0.708399
5,Warm,Yellow,0,0.333333,0.643363
6,Warm,Red,1,0.333333,0.643363
7,Hot,Yellow,0,0.75,0.708399
8,Hot,Yellow,1,0.75,0.708399
9,Cold,Yellow,1,1.0,0.742555


In [13]:
## label encoding

df4 = df.copy()

df4

# blue - 0 , red - 1, yellow - 2

Unnamed: 0,Temperature,Color,Target
0,Hot,Red,1
1,Cold,Yellow,1
2,Very Hot,Blue,1
3,Warm,Blue,0
4,Hot,Red,1
5,Warm,Yellow,0
6,Warm,Red,1
7,Hot,Yellow,0
8,Hot,Yellow,1
9,Cold,Yellow,1


In [14]:
## label encoding

from sklearn.preprocessing import LabelEncoder

# inititlize
le = LabelEncoder()

# fit transform

df4['color_enc'] = le.fit_transform(df4.Color)


df4

Unnamed: 0,Temperature,Color,Target,color_enc
0,Hot,Red,1,1
1,Cold,Yellow,1,2
2,Very Hot,Blue,1,0
3,Warm,Blue,0,0
4,Hot,Red,1,1
5,Warm,Yellow,0,2
6,Warm,Red,1,1
7,Hot,Yellow,0,2
8,Hot,Yellow,1,2
9,Cold,Yellow,1,2


In [20]:
### ordinal encoding

from sklearn.preprocessing import OrdinalEncoder

cate = [['Very Hot','Hot','Warm','Cold']]

# initilize
enc = OrdinalEncoder(categories=cate)

# fit transform
enc_data = enc.fit_transform(df4[['Temperature']])

# convert array to dataframe

df_enc = pd.DataFrame(enc_data,columns=['Temp_ord'])

# concat the dataframe

ord_enc = pd.concat([df4,df_enc],axis=1)

ord_enc

Unnamed: 0,Temperature,Color,Target,color_enc,Temp_ord
0,Hot,Red,1,1,1.0
1,Cold,Yellow,1,2,3.0
2,Very Hot,Blue,1,0,0.0
3,Warm,Blue,0,0,2.0
4,Hot,Red,1,1,1.0
5,Warm,Yellow,0,2,2.0
6,Warm,Red,1,1,2.0
7,Hot,Yellow,0,2,1.0
8,Hot,Yellow,1,2,1.0
9,Cold,Yellow,1,2,3.0
