# import the pickle

In [17]:
import joblib

model = joblib.load("fraud_detector.pkl")

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [18]:
model.n_features_in_

15

# sample input testing

In [19]:
sample = {
  'cat__category_Beauty': 0,
  'cat__category_Electronics': 1,
  'cat__category_Fashion': 0,
  'cat__category_Groceries': 0,
  'cat__category_Home Appliances': 0,
  'remainder__acc_days': 3,
  'remainder__failed_logins': 1,
  'remainder__is_vpn_or_proxy': 1,
  'remainder__transaction_amount': 2500,
  'remainder__is_card_blacklisted': 0,
  'remainder__is_multiple_cards_used': 1,
  'remainder__items_quantity': 7,
  'remainder__pages_viewed': 12,
  'remainder__device_change_during_session': 0,
  'remainder__purchase_frequency_user': 0.1
}


In [20]:
import pandas as pd

df = pd.DataFrame([sample])

prediction = model.predict(df)[0]



In [21]:
print("Fraudulent: ", bool(prediction))

Fraudulent:  True


# helper function for model input & the principle prediction function<br>
the model is not a pipeline rather just a linear regression model so we need to process the inputs

helper function and test

In [22]:
# List of feature names the model expects
feature_names = [
    'cat__category_Beauty', 'cat__category_Electronics', 'cat__category_Fashion', 
    'cat__category_Groceries', 'cat__category_Home Appliances', 'remainder__acc_days', 
    'remainder__failed_logins', 'remainder__is_vpn_or_proxy', 'remainder__transaction_amount', 
    'remainder__is_card_blacklisted', 'remainder__is_multiple_cards_used', 'remainder__items_quantity', 
    'remainder__pages_viewed', 'remainder__device_change_during_session', 'remainder__purchase_frequency_user'
]

def prepare_features(raw_input, feature_names):
    sample = {f: 0 for f in feature_names if f.startswith("cat__category_")}
    
    # Set the correct one-hot encoded category feature to 1
    category_col = "cat__category_" + raw_input["category"]
    if category_col not in feature_names:
        raise ValueError(f"Unknown category '{raw_input['category']}'")
    sample[category_col] = 1
    
    # Fill in remainder features from raw_input, default to 0 if missing
    for f in feature_names:
        if f.startswith("remainder__"):
            key = f.replace("remainder__", "")
            sample[f] = raw_input.get(key, 0)
    
    return sample


# Example raw input with human-friendly keys
raw_input = {
  "category": "Electronics",
  "acc_days": 3,
  "failed_logins": 1,
  "is_vpn_or_proxy": 1,
  "transaction_amount": 2500,
  "is_card_blacklisted": 0,
  "is_multiple_cards_used": 1,
  "items_quantity": 7,
  "pages_viewed": 12,
  "device_change_during_session": 0,
  "purchase_frequency_user": 0.1
}

# Prepare the input for the model
sample = prepare_features(raw_input, feature_names)

# Convert to DataFrame and predict
df = pd.DataFrame([sample])
prediction = model.predict(df)[0]

print("Fraudulent:", bool(prediction))


Fraudulent: True




Princple prediction function

In [25]:
def predict_fraud_with_confidence(raw_input):
    sample = prepare_features(raw_input, feature_names)
    df = pd.DataFrame([sample])
    pred = model.predict(df)[0]
    prob = model.predict_proba(df)[0][1]  # Probability for the positive class (fraud)
    return {
        "fraudulent": bool(pred),
        "confidence": float(round(prob, 5))  # rounded to 5 decimals for neatness
    }


In [26]:
#testing the prediction function

result = predict_fraud_with_confidence(raw_input)
print(result)

{'fraudulent': True, 'confidence': 0.97583}




## Wrapping the predictor into an api using "fastapi"

In [27]:
from fastapi import FastAPI
from pydantic import BaseModel

app = FastAPI()

class FraudInput(BaseModel):
    category: str
    acc_days: int
    failed_logins: int
    is_vpn_or_proxy: int  # or bool
    transaction_amount: float
    is_card_blacklisted: int  # or bool
    is_multiple_cards_used: int  # or bool
    items_quantity: int
    pages_viewed: int
    device_change_during_session: int  # or bool
    purchase_frequency_user: float

@app.post("/predict")
def predict(data: FraudInput):
    raw_input = data.dict()
    result = predict_fraud_with_confidence(raw_input)
    return result


# Run The SCRIPT !!!!!

The script can be run from the ipynb fiel but it is more convenient to run from the terminal so to run the script <br> 
type the following command into a shell: python start_pyserver.py<br>
the start_pyserver.py only starts the main script at fraud_api.py using uvicorn<br>
the fraud_api.py script is basically this file but streamlined