In [1]:
# import the 'os' module to handle folder and file operations
import os

# import 'pickle' to save and load Python objects (like models, encoders)
import pickle

# import 'numpy' for working with numerical arrays and data
import numpy as np

# create a folder named 'models' if it doesn't already exist
os.makedirs('models', exist_ok=True)

In [2]:
# load the OneHotEncoder for the 'Sex' column from the models folder
ohe_sex = pickle.load(open('models/ohe_sex.pkl', 'rb'))

# load the OneHotEncoder for the 'Embarked' column from the models folder
ohe_embarked = pickle.load(open('models/ohe_embarked.pkl', 'rb'))

# load the trained Decision Tree Classifier model from the models folder
clf = pickle.load(open('models/clf.pkl', 'rb'))

In [3]:
# create a single test sample with 7 features: [Pclass, Sex, Age, SibSp, Fare, Embarked]
test_input = np.array([2, 'male', 31.0, 0, 0, 10.5, 'S'], dtype=object).reshape(1, 7)

In [4]:
# display the test input sample
test_input

array([[2, 'male', 31.0, 0, 0, 10.5, 'S']], dtype=object)

In [5]:
# transform the 'Sex' column ('male') using the loaded OneHotEncoder
# test_input[:, 1] selects the 'Sex' column from test_input
# reshape(1, 1) ensures it's in the correct 2D shape for the encoder
test_input_sex = ohe_sex.transform(test_input[:, 1].reshape(1, 1))



In [6]:
# display the one-hot encoded value of the 'Sex' column for the test input
test_input_sex

array([[0., 1.]])

In [7]:
# transform the 'Embarked' column ('S') using the loaded OneHotEncoder
# test_input[:, -1] selects the last column ('Embarked') from the test input
# reshape(1, 1) ensures the input is in proper 2D shape for the encoder
test_input_embarked = ohe_embarked.transform(test_input[:, -1].reshape(1, 1))

In [8]:
# display the one-hot encoded value of the 'Embarked' column for the test input
test_input_embarked

array([[0., 0., 1.]])

In [9]:
# extract the 'Age' column (index 2) from the test input and reshape it to 2D
test_input_age = test_input[:, 2].reshape(1, 1)

In [10]:
# combine all processed features into one final input array for prediction
test_input_transformed = np.concatenate(
    (
        test_input[:, [0, 3, 4, 5]],  # select original columns: Pclass, SibSp, Fare, Embarked
        test_input_age,               # add reshaped Age column
        test_input_sex,               # add one-hot encoded 'Sex'
        test_input_embarked           # add one-hot encoded 'Embarked'
    ),
    axis=1  # combine horizontally (column-wise)
)

In [11]:
# check the shape of the final transformed test input
test_input_transformed.shape

(1, 10)

In [12]:
# use the trained Decision Tree classifier to predict the outcome for the test input
clf.predict(test_input_transformed)

array([1])