## Set working directory

In [1]:
import os

# Set working directory to where the artifacts folder is
os.chdir(r"C:\Users\mbuzii\Desktop\Projects")

# Confirm current directory
print(os.getcwd())

C:\Users\mbuzii\Desktop\Projects


## Load Libraries

In [2]:
import pandas as pd
import joblib
import json

## Load artifacts

In [3]:
# Load scaler and model
scaler_loaded = joblib.load("artifacts/preprocessor.pkl")
model_loaded = joblib.load("artifacts/model.pkl")

# Load schema
with open("artifacts/schema.json", "r") as f:
    schema = json.load(f)

print("Loaded artifacts successfully")
print("Numerical columns:", schema['num_cols'])
print("All columns:", schema['all_cols'])

Loaded artifacts successfully
Numerical columns: ['amount', 'customer_age', 'minute_of_day', 'to_acc_volume', 'session_duration']
All columns: ['amount', 'customer_age', 'minute_of_day', 'to_acc_volume', 'session_duration', 'hour_of_day', 'day_of_week']


## Load the sample dataset

In [4]:
X_sample = pd.read_csv("artifacts/sample_dataset.csv")
print("Sample dataset loaded:")
print(X_sample.head())

Sample dataset loaded:
     amount  customer_age  minute_of_day  to_acc_volume  session_duration  \
0 -0.113280     -0.303205       0.376880       2.270856         -0.579938   
1 -0.118548      0.108333      -0.746762       0.858012         -0.345664   
2 -0.098032     -0.452855       0.742605      -0.188584          0.137527   
3 -0.134065     -0.639918       0.903813       1.545054         -0.584121   
4 -0.137513      0.407633       0.995245      -0.287753         -0.584121   

   hour_of_day  day_of_week  
0           18            3  
1           12            1  
2           11            3  
3           23            4  
4            8            0  


## Scale numerical columns

In [5]:
X_sample[schema['num_cols']] = scaler_loaded.transform(X_sample[schema['num_cols']])

## One-hot encode categorical columns

In [6]:
X_sample = pd.get_dummies(X_sample, columns=['hour_of_day','day_of_week'], drop_first=True)

## Align columns with training data

In [7]:
# Get training columns used by the model
training_columns = model_loaded.feature_names_in_  

# Add missing columns in the sample with 0
for col in training_columns:
    if col not in X_sample.columns:
        X_sample[col] = 0

# Reorder columns to match training
X_sample = X_sample[training_columns]

print("Columns aligned with training data")

Columns aligned with training data


## Make predictions

In [8]:
y_pred = model_loaded.predict(X_sample)
print("Predictions for the sample dataset:", y_pred)

Predictions for the sample dataset: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]


In [9]:
y_proba = model_loaded.predict_proba(X_sample)[:, 1]
print("Predicted probabilities:", y_proba)

Predicted probabilities: [0.02 0.03 0.06 0.05 0.05 0.03 0.06 0.02 0.02 0.06]
