# Import the Dependencies

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OrdinalEncoder, LabelEncoder

# Load the Dataset

In [2]:
df = pd.read_csv("customer.csv")
df.sample(5)

Unnamed: 0,age,gender,review,education,purchased
18,19,Male,Good,School,No
44,77,Female,Average,UG,No
11,74,Male,Good,UG,Yes
2,70,Female,Good,PG,No
17,22,Female,Poor,UG,Yes


In [3]:
df = df.drop(["age", "gender"], axis = 1)
df.head()

Unnamed: 0,review,education,purchased
0,Average,School,No
1,Poor,UG,No
2,Good,PG,No
3,Good,PG,No
4,Average,UG,No


# Split the Dataset into Features and Target

In [4]:
X = df.drop("purchased", axis = 1)
y = df["purchased"]

In [5]:
X.shape

(50, 2)

In [6]:
y.shape

(50,)

# Split the Dataset into Training and Testing data

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

In [8]:
X.shape, X_train.shape, X_test.shape

((50, 2), (40, 2), (10, 2))

# Load Ordinal Encoder for Feature Columns(X)

In [9]:
# Load the Ordinal Encoder
oe = OrdinalEncoder(categories = [["Poor", "Average", "Good"], ["School", "UG", "PG"]])

In [10]:
# Fit the Training Data
oe.fit(X_train)

# Transform the Training and Testing Data
X_train = oe.transform(X_train)
X_test = oe.transform(X_test)

In [11]:
X_train

array([[0., 0.],
       [1., 1.],
       [1., 2.],
       [1., 1.],
       [2., 2.],
       [2., 0.],
       [2., 2.],
       [0., 2.],
       [2., 2.],
       [0., 1.],
       [2., 1.],
       [0., 1.],
       [1., 2.],
       [1., 0.],
       [0., 0.],
       [1., 0.],
       [1., 1.],
       [0., 2.],
       [2., 2.],
       [1., 0.],
       [1., 1.],
       [2., 1.],
       [2., 1.],
       [0., 1.],
       [1., 2.],
       [2., 2.],
       [0., 2.],
       [0., 0.],
       [2., 0.],
       [2., 0.],
       [2., 1.],
       [0., 2.],
       [2., 0.],
       [2., 1.],
       [1., 0.],
       [0., 0.],
       [2., 2.],
       [0., 2.],
       [0., 0.],
       [2., 0.]])

In [12]:
X_test

array([[1., 0.],
       [0., 2.],
       [1., 1.],
       [0., 2.],
       [0., 1.],
       [2., 1.],
       [0., 2.],
       [2., 0.],
       [1., 1.],
       [0., 2.]])

In [13]:
oe.categories_

[array(['Poor', 'Average', 'Good'], dtype=object),
 array(['School', 'UG', 'PG'], dtype=object)]

# Load Label Encoder for Target Column(y)

In [14]:
le = LabelEncoder()

In [15]:
le.fit(y_train)

y_train = le.transform(y_train)
y_test = le.transform(y_test)

In [16]:
y_train

array([0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1,
       1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0])

In [17]:
y_test

array([0, 0, 0, 1, 1, 1, 0, 0, 1, 1])