In [1]:
import pickle

input_file = 'XGBoost_model.bin'

with open(input_file, 'rb') as f_in: 
    dv, model = pickle.load(f_in)


dv, model

(DictVectorizer(sparse=False), <xgboost.core.Booster at 0x75c10f20ed20>)

In [2]:
feature_names = dv.get_feature_names_out()
print(feature_names)

['amount' 'card_present' 'card_type=Basic Credit' 'card_type=Basic Debit'
 'card_type=Gold Credit' 'card_type=Platinum Credit'
 'card_type=Premium Debit' 'channel=mobile' 'channel=pos' 'channel=web'
 'city=Chicago' 'city=Dallas' 'city=Houston' 'city=Los Angeles'
 'city=New York' 'city=Philadelphia' 'city=Phoenix' 'city=San Antonio'
 'city=San Diego' 'city=San Jose' 'city=Unknown City' 'city_size=large'
 'city_size=medium' 'country=Australia' 'country=Brazil' 'country=Canada'
 'country=France' 'country=Germany' 'country=Japan' 'country=Mexico'
 'country=Nigeria' 'country=Russia' 'country=Singapore' 'country=UK'
 'country=USA' 'day' 'device=Android App' 'device=Chip Reader'
 'device=Chrome' 'device=Edge' 'device=Firefox' 'device=Magnetic Stripe'
 'device=NFC Payment' 'device=Safari' 'device=iOS App'
 'distance_from_home' 'high_risk_merchant' 'hour'
 'max_single_amount_in_last_hour' 'merchant=AMC Theaters'
 'merchant=Airbnb' 'merchant=Aldi' 'merchant=AliExpress' 'merchant=Amazon'
 'mercha

In [3]:
test_record1 = {'amount': 66.98,
 'card_present': 0,
 'card_type': 'Platinum Credit',
 'channel': 'POS',
 'city': 'Chicago',
 'city_size': 'medium',
 'country': 'USA',
 'device': 'Chrome',
 'distance_from_home': 100.51,
 'high_risk_merchant': 1,
 'hour': 3,
 'max_single_amount_in_last_hour': 90.48,
 'merchant': 'Netflix',
 'merchant_category': 'Entertainment',
 'merchant_type': 'online',
 'num_transactions_in_last_hour': 4,
 'total_amount_in_last_hour': 965.14,
 'transaction_hour': 9,
 'unique_countries_in_last_hour': 1,
 'unique_merchants_in_last_hour': 2,
 'weekend_transaction': 0,
 'year': 2021,
 'month': 6,
 'day': 13}

In [4]:
test_record2 = {'amount': 66.98,
 'card_present': 1,
 'card_type': 'Platinum Credit',
 'channel': 'POS',
 'city': 'Chicago',
 'city_size': 'medium',
 'country': 'USA',
 'device': 'Chip Reader',
 'distance_from_home': 10.51,
 'high_risk_merchant': 0,
 'hour': 3,
 'max_single_amount_in_last_hour': 66.98,
 'merchant': 'Home Depot',
 'merchant_category': 'Retail',
 'merchant_type': 'physical',
 'num_transactions_in_last_hour': 4,
 'total_amount_in_last_hour': 160.14,
 'transaction_hour': 9,
 'unique_countries_in_last_hour': 1,
 'unique_merchants_in_last_hour': 2,
 'weekend_transaction': 0,
 'year': 2021,
 'month': 6,
 'day': 13}

In [5]:
import pandas as pd
import xgboost as xgb

# Assuming `model` and `dv` are already loaded/defined in your environment

def predict_transaction_fraud(test_record, fraud_threshold=0.5):
    """
    Predicts whether a given transaction is fraudulent or not using the pre-trained model and vectorizer.
    
    Parameters:
    - test_record (dict): The transaction data in dictionary format.
    - fraud_threshold (float): The threshold for classifying a transaction as fraud (default is 0.5).
    
    Returns:
    - str: 'Fraud' or 'Not Fraud' based on the model's prediction.
    """
    # Get the feature names used during training
    train_feature_names = model.feature_names

    # Transform the test record into the format used for prediction
    X_test = dv.transform([test_record])

    # Ensure the transformed test data has the same feature names as the training data
    X_test = pd.DataFrame(X_test, columns=train_feature_names)

    # Convert the DataFrame to a DMatrix
    X_test_dmatrix = xgb.DMatrix(X_test)

    # Get the prediction (probability of being fraudulent)
    y_preds = model.predict(X_test_dmatrix)

    # Classify the transaction based on the fraud threshold
    transaction_type = "Not Fraud" if y_preds[0] >= fraud_threshold else "Fraud"

    return transaction_type


In [6]:

# Call the function with just the test record
transaction_status = predict_transaction_fraud(test_record1)

print(f"The current transaction is: {transaction_status}")

The current transaction is: Fraud


In [7]:

# Call the function with just the test record
transaction_status = predict_transaction_fraud(test_record2)

print(f"The current transaction is: {transaction_status}")

The current transaction is: Not Fraud


**Web service check of predict.py using Flask**

In [15]:
import requests


url = 'http://localhost:9696/predict'

response = requests.post(url, json=test_record1).json()
print(response)
if response['transaction_status'] == 'Fraud':
    print('Send the notification to Customer')

{'transaction_status': 'Fraud'}
Send the notification to Customer


In [16]:
response = requests.post(url, json=test_record2).json()
print(response)
if response['transaction_status'] == 'Fraud':
    print('Send a notification to Customer')

{'transaction_status': 'Not Fraud'}
