### Label Encoder

In [1]:
import pandas as pd

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
color = pd.DataFrame(
    {
        'color':['red','blue','green','red','red','blue','blue','green','red']
    }
)

In [4]:
color.head()

Unnamed: 0,color
0,red
1,blue
2,green
3,red
4,red


In [5]:
from sklearn.preprocessing import LabelEncoder

In [6]:
le = LabelEncoder()

In [7]:
color_encoded = le.fit_transform(color[['color']])

In [8]:
color_encoded

array([2, 0, 1, 2, 2, 0, 0, 1, 2])

In [9]:
color['color_encoded'] = color_encoded

In [10]:
color

Unnamed: 0,color,color_encoded
0,red,2
1,blue,0
2,green,1
3,red,2
4,red,2
5,blue,0
6,blue,0
7,green,1
8,red,2


In [11]:
size = pd.DataFrame(
    {
        'size':['small','large','medium','medium','medium','small','large','xl','small'],
        'size_in_number':[10,30,20,20,20,10,30,40,10]
    }
)

In [12]:
size.head()

Unnamed: 0,size,size_in_number
0,small,10
1,large,30
2,medium,20
3,medium,20
4,medium,20


In [13]:
size_encoded = le.fit_transform(
    size['size']
)

size_encoded

array([2, 0, 1, 1, 1, 2, 0, 3, 2])

In [14]:
print( le.transform(['large']) )

print( le.transform(['xl']) )

[0]
[3]


In [15]:
size['size_encoded'] = size_encoded

In [16]:
size

Unnamed: 0,size,size_in_number,size_encoded
0,small,10,2
1,large,30,0
2,medium,20,1
3,medium,20,1
4,medium,20,1
5,small,10,2
6,large,30,0
7,xl,40,3
8,small,10,2


### Ordinal Encoder

In [17]:
size = pd.DataFrame(
    {
        'size':['small','large','medium','medium','medium','small','large','xl','small'],
        'size_in_number':[10,30,20,20,20,10,30,40,10]
    }
)

In [18]:
size.head()

Unnamed: 0,size,size_in_number
0,small,10
1,large,30
2,medium,20
3,medium,20
4,medium,20


In [19]:
from sklearn.preprocessing import OrdinalEncoder

In [20]:
unique = size['size'].unique()

unique

array(['small', 'large', 'medium', 'xl'], dtype=object)

### specify the order

In [21]:
oe = OrdinalEncoder(
    categories= [ ['small', 'medium', 'large', 'xl'] ] ,
    dtype=int
)

In [22]:
size_encoder = oe.fit_transform(
    size[['size']]
)

In [23]:
size_encoder

array([[0],
       [2],
       [1],
       [1],
       [1],
       [0],
       [2],
       [3],
       [0]])

In [24]:
size['size_encoder'] = size_encoder

In [25]:
size

Unnamed: 0,size,size_in_number,size_encoder
0,small,10,0
1,large,30,2
2,medium,20,1
3,medium,20,1
4,medium,20,1
5,small,10,0
6,large,30,2
7,xl,40,3
8,small,10,0


#### target guided encoding

In [26]:
df = pd.DataFrame(
    {
        'city': ['vijayawada','vizag','kakinada','vizag','machilipatnam','vijayawada','kakinana','vizag','vijayawada','kakinada','eleru','machilipatnam'],
        'price':[100,120,130,100,250,140,200,220,160,240,250,300]
    }
)

In [27]:
df

Unnamed: 0,city,price
0,vijayawada,100
1,vizag,120
2,kakinada,130
3,vizag,100
4,machilipatnam,250
5,vijayawada,140
6,kakinana,200
7,vizag,220
8,vijayawada,160
9,kakinada,240


In [28]:
city_mean = df[['city','price']].groupby('city')['price'].mean().to_dict()

city_mean

{'eleru': 250.0,
 'kakinada': 185.0,
 'kakinana': 200.0,
 'machilipatnam': 275.0,
 'vijayawada': 133.33333333333334,
 'vizag': 146.66666666666666}

In [29]:
df['city_encoded'] = df['city'].map( city_mean )

In [30]:
df

Unnamed: 0,city,price,city_encoded
0,vijayawada,100,133.333333
1,vizag,120,146.666667
2,kakinada,130,185.0
3,vizag,100,146.666667
4,machilipatnam,250,275.0
5,vijayawada,140,133.333333
6,kakinana,200,200.0
7,vizag,220,146.666667
8,vijayawada,160,133.333333
9,kakinada,240,185.0


In [31]:
df[['price','city_encoded']]

Unnamed: 0,price,city_encoded
0,100,133.333333
1,120,146.666667
2,130,185.0
3,100,146.666667
4,250,275.0
5,140,133.333333
6,200,200.0
7,220,146.666667
8,160,133.333333
9,240,185.0
