# Load model tensorlfow


In [1]:
import tensorflow as tf

model_path = "../notebooks/models/fraud_model.h5"
model = tf.keras.models.load_model(model_path)

print("✅ TensorFlow fraud model loaded")


2025-12-28 23:47:25.077330: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M4
2025-12-28 23:47:25.077349: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2025-12-28 23:47:25.077354: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.92 GB
2025-12-28 23:47:25.077375: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2025-12-28 23:47:25.077387: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


✅ TensorFlow fraud model loaded


# Load dataset from PostgreSQL

In [2]:
import pandas as pd
import os
from sqlalchemy import create_engine

DB_USER = os.getenv("DB_USER")
DB_PASSWORD = os.getenv("DB_PASSWORD")
DB_HOST = os.getenv("DB_HOST")
DB_PORT = os.getenv("DB_PORT")
DB_NAME = os.getenv("DB_NAME")

engine = create_engine(
    f"postgresql+psycopg2://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}"
)

# Query to get all columns from training_fraud_dataset
df = pd.read_sql("""SELECT * FROM feature.training_fraud_dataset""", engine)

# Apply the same preprocessing as in training (notebook 05)
df['event_date'] = pd.to_datetime(df['event_date'])
df['Day'] = df['event_date'].dt.day
df = df.drop(columns=['event_date'])

print(f"✅ Data loaded. Shape: {df.shape}")
print(f"Columns: {df.columns.tolist()}")
df.head()

✅ Data loaded. Shape: (41188, 17)
Columns: ['user_id', 'tx_count', 'total_tx_amount', 'avg_tx_amount', 'max_tx_amount', 'std_tx_amount', 'avg_account_balance', 'total_clicks', 'total_impressions', 'total_conversion', 'ctr', 'weekday', 'month', 'year', 'fraud_score', 'fraud_label', 'Day']


Unnamed: 0,user_id,tx_count,total_tx_amount,avg_tx_amount,max_tx_amount,std_tx_amount,avg_account_balance,total_clicks,total_impressions,total_conversion,ctr,weekday,month,year,fraud_score,fraud_label,Day
0,AA13249,464,234972720.0,506406.724138,4963146.0,1040360.0,575492.148707,222,1,0,222.0,4.0,12.0,2025.0,231.800711,0,25
1,AA16259,464,234972720.0,506406.724138,4963146.0,1040360.0,575492.148707,237,1,0,237.0,4.0,12.0,2025.0,246.800711,0,25
2,AA35121,464,234972720.0,506406.724138,4963146.0,1040360.0,575492.148707,133,1,0,133.0,4.0,12.0,2025.0,142.800711,0,25
3,AA23441,464,234972720.0,506406.724138,4963146.0,1040360.0,575492.148707,167,7,0,23.857143,4.0,12.0,2025.0,33.657854,0,25
4,AA19754,464,234972720.0,506406.724138,4963146.0,1040360.0,575492.148707,243,1,0,243.0,4.0,12.0,2025.0,252.800711,0,25


In [3]:
# check model layer
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 64)                1024      
                                                                 
 dense_1 (Dense)             (None, 32)                2080      
                                                                 
 dense_2 (Dense)             (None, 1)                 33        
                                                                 
Total params: 3137 (12.25 KB)
Trainable params: 3137 (12.25 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


# Load scaler (required for preprocessing)

In [4]:
import joblib

scaler_path = "../notebooks/models/scaler_fraud_model.pkl"
scaler = joblib.load(scaler_path)

print("✅ Scaler loaded")

✅ Scaler loaded


In [5]:
# Create features - EXACTLY as done in training (notebook 05)
# The model expects 15 features (all columns except 'user_id' and 'fraud_label')

FEATURES = [
    "tx_count",
    "total_tx_amount",
    "avg_tx_amount",
    "max_tx_amount",
    "std_tx_amount",
    "avg_account_balance",
    "total_clicks",
    "total_impressions",
    "total_conversion",
    "ctr",
    "weekday",
    "month",
    "year",
    "fraud_score",
    "Day"
]

print(f"✅ Total features: {len(FEATURES)}")
X = df[FEATURES].values

print(f"✅ Shape of X: {X.shape}")
print(f"   Expected by model: (None, 15)")

✅ Total features: 15
✅ Shape of X: (41188, 15)
   Expected by model: (None, 15)


# Generate Fraud probability from model

In [6]:
# Scale the features (CRITICAL: model was trained on scaled data)
X_scaled = scaler.transform(X)

# Generate fraud probability
fraud_prob = model.predict(X_scaled).flatten()
df["fraud_probability"] = fraud_prob

# Fraud prediction with thresold at 0.5
df["fraud_prediction"] = (df["fraud_probability"] >= 0.5).astype(int)

print(f"✅ Predictions generated for {len(fraud_prob)} records")
print(f"   Fraud probability range: {fraud_prob.min():.4f} - {fraud_prob.max():.4f}")
print(f"   Mean fraud probability: {fraud_prob.mean():.4f}")
print(f"   Total predicted frauds: {df['fraud_prediction'].sum()}")

# Show sample predictions
df.head()

  65/1288 [>.............................] - ETA: 1s

2025-12-28 23:47:26.283277: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


✅ Predictions generated for 41188 records
   Fraud probability range: 0.0000 - 1.0000
   Mean fraud probability: 0.1998
   Total predicted frauds: 8246


Unnamed: 0,user_id,tx_count,total_tx_amount,avg_tx_amount,max_tx_amount,std_tx_amount,avg_account_balance,total_clicks,total_impressions,total_conversion,ctr,weekday,month,year,fraud_score,fraud_label,Day,fraud_probability,fraud_prediction
0,AA13249,464,234972720.0,506406.724138,4963146.0,1040360.0,575492.148707,222,1,0,222.0,4.0,12.0,2025.0,231.800711,0,25,4.98912e-08,0
1,AA16259,464,234972720.0,506406.724138,4963146.0,1040360.0,575492.148707,237,1,0,237.0,4.0,12.0,2025.0,246.800711,0,25,0.00321764,0
2,AA35121,464,234972720.0,506406.724138,4963146.0,1040360.0,575492.148707,133,1,0,133.0,4.0,12.0,2025.0,142.800711,0,25,1.4232349999999999e-36,0
3,AA23441,464,234972720.0,506406.724138,4963146.0,1040360.0,575492.148707,167,7,0,23.857143,4.0,12.0,2025.0,33.657854,0,25,0.0,0
4,AA19754,464,234972720.0,506406.724138,4963146.0,1040360.0,575492.148707,243,1,0,243.0,4.0,12.0,2025.0,252.800711,0,25,0.2133464,0


# Prompt Engineering

In [7]:
def build_prompt(row):
    return f"""
You are a senior fraud risk analyst.

Analyze the following user profile and explain the Fraud Risk based on the features provided.

MODEL OUTPUT:
- Fraud Probability: {row.fraud_probability:.4f}

USER BEHAVIOR FEATURES:
- Transaction Count: {row.tx_count}
- Total Transaction Amount: {row.total_tx_amount}
- Averaege Transaction Amount: {row.avg_tx_amount}
- Maximum Transaction Amount: {row.max_tx_amount}
- Standard Deviation of Transaction Amount: {row.std_tx_amount}
- Average Account Balance: {row.avg_account_balance}
- Total Clicks: {row.total_clicks}
- Total Impressions: {row.total_impressions}
- Total Conversions: {row.total_conversion}
- Click-Through Rate (CTR): {row.ctr:.4f}

TASK:
1. Explain why this probability is high or low based on the features above.
2. Highlight key risk drivers.
3. Recommend an action (ALLOW / REVIEW / BLOCK) based on the analysis.
4. Keep the explanation concise and to the point.
5. Explain if fraud prediction was related to the features or not.
6. Limit the response to 1000 words.

DO NOT make up data or features not provided in the profile.
Base reasoning strictly on inputs above.
"""

# Send prompt to ollama

In [8]:
import requests

def call_ollama(prompt, model="llama3"):
    response = requests.post(
        "http://localhost:11434/api/generate",
        json={
            "model": model,
            "prompt": prompt,
            "stream": False
        },
        timeout=60
    )
    return response.json()["response"]

# Implementation prompt engineering

In [9]:
sample = df.sample(1, random_state=42).iloc[0]
prompt = build_prompt(sample)

explanation = call_ollama(prompt=prompt, model="llama3")

print("Fraud probability:", sample.fraud_probability)
print("LLM Explanation:")

from IPython.display import HTML, display
display(HTML(f"""
<div style='
    font-size:16px; 
    line-height:1.8; 
    text-align:justify; 
    white-space:pre-wrap; 
    padding:center; 
    border-radius:5px;
    max-width:100%;
'>
{explanation}
</div>
"""))

Fraud probability: 0.0
LLM Explanation:
