# Ordinal Encoding

In [1]:
import pandas as pd

### 1. Cycle Line

In [2]:
df = pd.DataFrame({"Size" :['s','m','l','xl','xxl','xs','s','s','xl','m','l','l'] })
df.head()

Unnamed: 0,Size
0,s
1,m
2,l
3,xl
4,xxl


In [3]:
ord_data = [['s','m','l','xl','xxl','xs']]

In [5]:
from sklearn.preprocessing import OrdinalEncoder

In [6]:
oe =OrdinalEncoder(categories = ord_data)
oe.fit(df[["Size"]])

In [8]:
df["Size_en"] = oe.transform(df[["Size"]])
df

Unnamed: 0,Size,Size_en
0,s,0.0
1,m,1.0
2,l,2.0
3,xl,3.0
4,xxl,4.0
5,xs,5.0
6,s,0.0
7,s,0.0
8,xl,3.0
9,m,1.0


### 2. Map Line

In [9]:
ord_data1 = {'s':0, 'm':1, 'l':2, 'xl':3, "xxl":4, "xs":5}
df["Size_en_map"] = df["Size"].map(ord_data1)
df

Unnamed: 0,Size,Size_en,Size_en_map
0,s,0.0,0
1,m,1.0,1
2,l,2.0,2
3,xl,3.0,3
4,xxl,4.0,4
5,xs,5.0,5
6,s,0.0,0
7,s,0.0,0
8,xl,3.0,3
9,m,1.0,1


In [10]:
#u can assign number by your choice in map line

### ordinal encoding on bigger dataset

In [11]:
dataset = pd.read_csv("Online Sales Data.csv")
dataset.head(4)

Unnamed: 0,Transaction ID,Date,Product Category,Product Name,Units Sold,Unit Price,Total Revenue,Region,Payment Method
0,10001.0,01-01-2024,,iPhone 14 Pro,2,999.99,1999.98,North America,Credit Card
1,10002.0,02-01-2024,Home Appliances,Dyson V11 Vacuum,1,499.99,499.99,Europe,PayPal
2,10003.0,03-01-2024,Clothing,Levi's 501 Jeans,3,69.99,209.97,Asia,Debit Card
3,10004.0,04-01-2024,Books,The Da Vinci Code,4,15.99,63.96,North America,Credit Card


In [12]:
dataset["Payment Method"].unique()

array(['Credit Card', 'PayPal', 'Debit Card', nan], dtype=object)

In [13]:
#we dont need nan so lets fill this values
dataset["Payment Method"].fillna(dataset["Payment Method"].mode()[0], inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  dataset["Payment Method"].fillna(dataset["Payment Method"].mode()[0], inplace=True)


In [15]:
dataset["Payment Method"].unique()       #hence we had fill the data

array(['Credit Card', 'PayPal', 'Debit Card'], dtype=object)

In [16]:
en_data_ord = [['Credit Card', 'PayPal', 'Debit Card']]

In [17]:
from sklearn.preprocessing import OrdinalEncoder

In [18]:
oen = OrdinalEncoder(categories=en_data_ord)
oen.fit_transform(dataset[["Payment Method"]])

array([[0.],
       [1.],
       [2.],
       [0.],
       [1.],
       [0.],
       [0.],
       [1.],
       [2.],
       [0.],
       [1.],
       [0.],
       [0.],
       [1.],
       [2.],
       [0.],
       [1.],
       [0.],
       [0.],
       [1.],
       [2.],
       [0.],
       [1.],
       [0.],
       [0.],
       [1.],
       [2.],
       [0.],
       [1.],
       [0.],
       [0.],
       [1.],
       [2.],
       [0.],
       [1.],
       [0.],
       [0.],
       [1.],
       [2.],
       [0.],
       [1.],
       [0.],
       [0.],
       [1.],
       [2.],
       [0.],
       [1.],
       [0.],
       [0.],
       [1.],
       [2.],
       [0.],
       [1.],
       [0.],
       [0.],
       [1.],
       [2.],
       [0.],
       [1.],
       [0.],
       [0.],
       [1.],
       [2.],
       [0.],
       [1.],
       [0.],
       [0.],
       [1.],
       [2.],
       [0.],
       [1.],
       [0.],
       [0.],
       [1.],
       [2.],
       [0.],
       [1.],

In [19]:
oen = OrdinalEncoder(categories=en_data_ord)
dataset["Payment Method"] = oen.fit_transform(dataset[["Payment Method"]])
dataset.head(10)

Unnamed: 0,Transaction ID,Date,Product Category,Product Name,Units Sold,Unit Price,Total Revenue,Region,Payment Method
0,10001.0,01-01-2024,,iPhone 14 Pro,2,999.99,1999.98,North America,0.0
1,10002.0,02-01-2024,Home Appliances,Dyson V11 Vacuum,1,499.99,499.99,Europe,1.0
2,10003.0,03-01-2024,Clothing,Levi's 501 Jeans,3,69.99,209.97,Asia,2.0
3,10004.0,04-01-2024,Books,The Da Vinci Code,4,15.99,63.96,North America,0.0
4,10005.0,05-01-2024,Beauty Products,Neutrogena Skincare Set,1,89.99,89.99,Europe,1.0
5,10006.0,06-01-2024,Sports,Wilson Evolution Basketball,5,29.99,149.95,Asia,0.0
6,10007.0,07-01-2024,Electronics,MacBook Pro 16-inch,1,2499.99,2499.99,North America,0.0
7,10008.0,08-01-2024,Home Appliances,Blueair Classic 480i,2,599.99,1199.98,Europe,1.0
8,10009.0,09-01-2024,Clothing,Nike Air Force 1,6,89.99,539.94,Asia,2.0
9,10010.0,10-01-2024,Books,Dune by Frank Herbert,2,25.99,51.98,North America,0.0
