First we will apply in Ordinal encoding on a small dataset for Understanding

In [1]:
import pandas as pd

In [4]:
df = pd.DataFrame({"Size": ["s", "m", "l", "xl", "s", "m", "l", "s", "s", "l", "xl", "m"]}) # creating a Dataframe first as a dictionary form
df.head(3)

Unnamed: 0,Size
0,s
1,m
2,l


In [9]:
ordered_data = [
    ["s", "m", "l", "xl"]
]  # creating a list of lists to define the order of the categories

In [10]:
from sklearn.preprocessing import OrdinalEncoder

In [14]:
oe = OrdinalEncoder(categories=ordered_data)  # initializing the OrdinalEncoder with the specified order. Otherwise by default it is set to auto that means it will sort the categories in alphabetical order
oe.fit(df[["Size"]]) # fitting the encoder to the DataFrame

0,1,2
,categories,"[['s', 'm', ...]]"
,dtype,<class 'numpy.float64'>
,handle_unknown,'error'
,unknown_value,
,encoded_missing_value,
,min_frequency,
,max_categories,


In [15]:
df["size_encoded"] = oe.transform(
    df[["Size"]]
)  # transforming the DataFrame to encode the "Size" column

In [12]:
df

Unnamed: 0,Size,Size_Encoded
0,s,0.0
1,m,1.0
2,l,2.0
3,xl,3.0
4,s,0.0
5,m,1.0
6,l,2.0
7,s,0.0
8,s,0.0
9,l,2.0


Now Doing it with the help of map function 

In [26]:
ordered_data1 = {"s": 1, "m": 2, "l": 3, "xl": 4}  # creating a dictionary to define the order of the categories

In [27]:
df["Size"].map(ordered_data1)  # mapping the "Size" column to the defined order using the dictionary

0     1
1     2
2     3
3     4
4     1
5     2
6     3
7     1
8     1
9     3
10    4
11    2
Name: Size, dtype: int64

In [28]:
df["size_encoded_map"] = df["Size"].map(ordered_data1)  # storing the mapped values in a new column

In [29]:
df

Unnamed: 0,Size,Size_Encoded,size_encoded,size_encoded1,size_encoded_map
0,s,0.0,0.0,1,1
1,m,1.0,1.0,2,2
2,l,2.0,2.0,3,3
3,xl,3.0,3.0,4,4
4,s,0.0,0.0,1,1
5,m,1.0,1.0,2,2
6,l,2.0,2.0,3,3
7,s,0.0,0.0,1,1
8,s,0.0,0.0,1,1
9,l,2.0,2.0,3,3


Now we will work with our actual dataset and apply Ordinal encoding on it 

In [30]:
dataset = pd.read_csv("loan.csv")
dataset.head(3)

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,LP001002,Male,No,0.0,Graduate,No,5849.0,0.0,,360.0,1.0,Urban,Y
1,LP001003,Male,Yes,1.0,Graduate,No,4583.0,1508.0,128.0,360.0,1.0,Rural,N
2,LP001005,Male,Yes,0.0,Graduate,Yes,3000.0,0.0,66.0,360.0,1.0,Urban,Y


we will apply Ordinal encoding on "Property_Area"

In [31]:
dataset[
    "Property_Area"
].unique()  # check how many unique values are present in the column

array(['Urban', 'Rural', 'Semiurban', nan], dtype=object)

In [32]:
# lets apply mode filling on the column to fill the missing values first
dataset["Property_Area"].fillna(
    dataset["Property_Area"].mode()[0], inplace=True
)  # filling missing values with the mode of the column

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  dataset["Property_Area"].fillna(


In [33]:
# creating a list of lists to define the order of the categories for "Property_Area"
loan_ordered_data = [
    ["Urban", "Semiurban", "Rural"]
]

In [34]:
from sklearn.preprocessing import OrdinalEncoder

In [35]:
oen = OrdinalEncoder(categories=loan_ordered_data)  # initializing the OrdinalEncoder with the specified order

In [36]:
oen.fit(dataset[["Property_Area"]])  # fitting the encoder to the DataFrame

0,1,2
,categories,"[['Urban', 'Semiurban', ...]]"
,dtype,<class 'numpy.float64'>
,handle_unknown,'error'
,unknown_value,
,encoded_missing_value,
,min_frequency,
,max_categories,


In [39]:
dataset["Property_Area"] = oen.transform(
    dataset[["Property_Area"]]
  )  # transforming the DataFrame to encode the "Property_Area" column

In [40]:
dataset.head(3)  # displaying the first 3 rows of the DataFrame with the new encoded column

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status,Property_Area_encoded
0,LP001002,Male,No,0.0,Graduate,No,5849.0,0.0,,360.0,1.0,0.0,Y,0.0
1,LP001003,Male,Yes,1.0,Graduate,No,4583.0,1508.0,128.0,360.0,1.0,2.0,N,2.0
2,LP001005,Male,Yes,0.0,Graduate,Yes,3000.0,0.0,66.0,360.0,1.0,0.0,Y,0.0
