Ordinal encoding is a method of converting categorical data into numerical values by assigning a unique integer to each category based on a specific order. This technique is useful when the categories have a meaningful order or rank, allowing the model to capture this ordinal relationship.

In [114]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt

In [115]:
df = pd.read_csv('customer.csv')

In [116]:
df.head()

Unnamed: 0,age,gender,review,education,purchase
0,69,Male,Good,PhD,No
1,49,Male,Bad,High School,No
2,43,Female,Good,PhD,No
3,39,Female,Good,High School,No
4,54,Male,Good,Bachelor's,No


In [117]:
df.sample()

Unnamed: 0,age,gender,review,education,purchase
82,31,Female,Good,High School,Yes


In [118]:
df = df.iloc[:,2:]

In [119]:
df

Unnamed: 0,review,education,purchase
0,Good,PhD,No
1,Bad,High School,No
2,Good,PhD,No
3,Good,High School,No
4,Good,Bachelor's,No
...,...,...,...
95,Bad,Bachelor's,No
96,Good,Bachelor's,No
97,Good,Master's,Yes
98,Bad,High School,No


In [120]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(df.iloc[:,0:2],df.iloc[:,-1],test_size=0.2)

In [121]:
from sklearn.preprocessing import OrdinalEncoder

In [122]:
x_train

Unnamed: 0,review,education
72,Bad,High School
83,Good,High School
99,Bad,Master's
7,Bad,PhD
67,Good,PhD
...,...,...
21,Bad,Master's
20,Bad,High School
45,Bad,High School
76,Bad,PhD


In [123]:
y_test.describe()

count     20
unique     2
top       No
freq      10
Name: purchase, dtype: object

In [124]:
y_train.describe()

count      80
unique      2
top       Yes
freq       41
Name: purchase, dtype: object

In [125]:
x_train.describe()

Unnamed: 0,review,education
count,80,80
unique,2,4
top,Bad,High School
freq,45,21


In [126]:
y_test.describe()

count     20
unique     2
top       No
freq      10
Name: purchase, dtype: object

In [127]:
oc = OrdinalEncoder(categories=[["Good", "Bad"], ["High School", "Bachelor's", "Master's", "PhD"]])

In [128]:
oc.fit(x_train)

In [129]:
x_train = oc.transform(x_train)

In [130]:
oc.fit(x_test)

In [131]:
x_train

array([[1., 0.],
       [0., 0.],
       [1., 2.],
       [1., 3.],
       [0., 3.],
       [0., 0.],
       [0., 1.],
       [0., 2.],
       [0., 1.],
       [0., 1.],
       [0., 3.],
       [1., 0.],
       [1., 1.],
       [1., 3.],
       [1., 2.],
       [1., 2.],
       [0., 2.],
       [1., 2.],
       [1., 3.],
       [0., 2.],
       [0., 3.],
       [1., 0.],
       [0., 2.],
       [1., 2.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [0., 0.],
       [1., 0.],
       [1., 2.],
       [1., 3.],
       [0., 3.],
       [1., 2.],
       [0., 0.],
       [0., 3.],
       [1., 1.],
       [1., 3.],
       [1., 0.],
       [0., 3.],
       [0., 1.],
       [0., 0.],
       [1., 2.],
       [0., 2.],
       [1., 1.],
       [0., 1.],
       [0., 0.],
       [1., 0.],
       [0., 3.],
       [0., 0.],
       [1., 1.],
       [1., 3.],
       [0., 3.],
       [1., 1.],
       [1., 2.],
       [1., 0.],
       [0., 2.],
       [0., 3.],
       [0., 0.],
       [0., 1.

In [132]:
x_test = oc.transform(x_test)

In [135]:
x_test

array([[0., 2.],
       [1., 3.],
       [1., 2.],
       [1., 3.],
       [1., 1.],
       [1., 2.],
       [1., 3.],
       [1., 1.],
       [0., 0.],
       [1., 2.],
       [0., 0.],
       [0., 0.],
       [0., 3.],
       [1., 3.],
       [1., 3.],
       [1., 0.],
       [1., 1.],
       [1., 2.],
       [0., 0.],
       [0., 0.]])

In [136]:
oc.categories

[['Good', 'Bad'], ['High School', "Bachelor's", "Master's", 'PhD']]

In [137]:
x_train

array([[1., 0.],
       [0., 0.],
       [1., 2.],
       [1., 3.],
       [0., 3.],
       [0., 0.],
       [0., 1.],
       [0., 2.],
       [0., 1.],
       [0., 1.],
       [0., 3.],
       [1., 0.],
       [1., 1.],
       [1., 3.],
       [1., 2.],
       [1., 2.],
       [0., 2.],
       [1., 2.],
       [1., 3.],
       [0., 2.],
       [0., 3.],
       [1., 0.],
       [0., 2.],
       [1., 2.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [0., 0.],
       [1., 0.],
       [1., 2.],
       [1., 3.],
       [0., 3.],
       [1., 2.],
       [0., 0.],
       [0., 3.],
       [1., 1.],
       [1., 3.],
       [1., 0.],
       [0., 3.],
       [0., 1.],
       [0., 0.],
       [1., 2.],
       [0., 2.],
       [1., 1.],
       [0., 1.],
       [0., 0.],
       [1., 0.],
       [0., 3.],
       [0., 0.],
       [1., 1.],
       [1., 3.],
       [0., 3.],
       [1., 1.],
       [1., 2.],
       [1., 0.],
       [0., 2.],
       [0., 3.],
       [0., 0.],
       [0., 1.

In [138]:
from sklearn.preprocessing import LabelEncoder

In [139]:
le = LabelEncoder()

In [140]:
le.fit(y_train)

In [141]:
le.classes_

array(['No', 'Yes'], dtype=object)

In [142]:
y_train = le.transform(y_train)
y_test = le.transform(y_test)

In [143]:
y_train

array([1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1,
       1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0,
       0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1,
       0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0])

In [144]:
y_test

array([0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1])