In [1]:
from langchain_community.llms import LlamaCpp
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from datetime import datetime
from geopy.distance import geodesic
from typing import List, Dict
import sys
from dateutil.relativedelta import relativedelta
import json
from dataclasses import dataclass
from typing import Any, Optional, Tuple
from collections import Counter
from llama_cpp import LlamaGrammar

sys.path.append("../")
from models.transaction import TransactionModel

# Update the grammar to be absolutely strict about the output
FRAUD_DETECTION_GRAMMAR_STRING = r"""
root   ::= json
json   ::= "{" ws risk_level ws "," ws key_factors ws "}"
risk_level ::= "\"risk_level\"" ws ":" ws ("\"LOW\"" | "\"MEDIUM\"" | "\"HIGH\"")
key_factors ::= "\"key_factors\"" ws ":" ws "[" factors "]"
factors ::= "" | factor_item | factor_item ("," ws factor_item)*
factor_item ::= "\"" [^""]+ "\""
ws     ::= [ \t\n]*
"""

In [None]:
@dataclass
class CardholderProfile:
    """Represents analyzed patterns of the cardholder"""

    home_location: tuple[float, float]  # (lat, long) of residential address
    common_merchants: List[str]  # Frequently visited merchants
    common_categories: List[str]  # Common spending categories
    typical_amounts: Dict[str, float]  # Typical amounts by category
    active_hours: List[int]  # Hours when cardholder typically transacts
    job: str  # Direct job title
    gender: str  # Gender (M/F)
    age: int  # Age derived from DOB
    usual_radius: float  # Typical transaction radius from home

    @classmethod
    def from_transaction(
        cls, transaction: TransactionModel, history: List[Dict]
    ) -> "CardholderProfile":
        """Create a cardholder profile from transaction and history"""
        # Calculate age
        dob = transaction.dob
        age = relativedelta(datetime.today(), dob).years
        # Home location from current transaction's address
        home_location = (float(transaction.lat), float(transaction.long))

        if not history:
            return cls(
                home_location=home_location,
                common_merchants=[transaction.merchant],
                common_categories=[transaction.category],
                typical_amounts={transaction.category: float(transaction.amt)},
                active_hours=[transaction.trans_date_trans_time.hour],
                job=transaction.job,
                gender=transaction.gender,
                age=age,
                usual_radius=0.0,
            )

        # Analyze transaction history
        merchants = Counter([tx["merchant"] for tx in history])
        categories = Counter([tx["category"] for tx in history])

        # Calculate typical amounts by category
        amounts_by_category = {}
        for tx in history:
            cat = tx["category"]
            if cat not in amounts_by_category:
                amounts_by_category[cat] = []
            amounts_by_category[cat].append(tx["amount"])

        typical_amounts = {
            cat: sum(amounts) / len(amounts)
            for cat, amounts in amounts_by_category.items()
        }
        timestamp = datetime.strptime(
            tx["timestamp"], "%Y-%m-%d %H:%M:%S"
        )  # Analyze transaction hours
        hours = [timestamp.hour for tx in history]

        # Calculate usual radius
        distances = [
            geodesic(
                home_location, (float(tx["merch_lat"]), float(tx["merch_long"]))
            ).miles
            for tx in history
        ]
        usual_radius = sum(distances) / len(distances)

        return cls(
            home_location=home_location,
            common_merchants=[m for m, _ in merchants.most_common(5)],
            common_categories=[c for c, _ in categories.most_common(5)],
            typical_amounts=typical_amounts,
            active_hours=list(set(hours)),
            job=transaction.job,
            gender=transaction.gender,
            age=age,
            usual_radius=usual_radius,
        )

In [34]:
from datetime import datetime, date, timedelta
from decimal import Decimal
from typing import List, Dict

# Base Cardholder Profile
test_profile = CardholderProfile(
    common_merchants=["Whole Foods", "Starbucks", "Target", "Shell", "Trader Joe's"],
    home_location=(47.6062, -122.3321),  # Seattle coordinates

    common_categories=["grocery", "restaurant", "retail", "gas", "entertainment"],
    typical_amounts={
        "grocery": 150.00,
        "restaurant": 35.00,
        "retail": 100.00,
        "gas": 50.00,
        "entertainment": 30.00
    },
    active_hours=[8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20],
    job="Software Engineer",
    gender="F",
    age=1021,
    usual_radius=10.0  # 10 mile radius from home
)

# Transaction History (last 24 hours)
transaction_history = [
    {
        "timestamp": "2024-03-15 09:30:00",
        "merchant": "Starbucks",
        "category": "restaurant",
        "amount": 6.75,
        "merch_lat": 47.6082,
        "merch_long": -122.3351
    },
    {
        "timestamp": "2024-03-15 12:45:00",
        "merchant": "Whole Foods",
        "category": "grocery",
        "amount": 156.42,
        "merch_lat": 47.6159,
        "merch_long": -122.3359
    }
]

# Test Scenarios

# Scenario 1: Normal Transaction
normal_transaction = TransactionModel(
    trans_date_trans_time=datetime(2024, 3, 15, 14, 30),
    cc_num="4532123456789012",
    merchant="Target",
    category="retail",
    amt=Decimal("95.50"),
    first="Sarah",
    last="Johnson",
    gender="F",
    street="742 Maple Avenue",
    city="Seattle",
    state="WA",
    zip="98101",
    lat=47.6062,
    long=-122.3321,
    city_pop=737015,
    job="Software Engineer",
    dob=date(1988, 5, 12),
    trans_num="T000001",
    unix_time=1710512400,
    merch_lat=47.6142,
    merch_long=-122.3331,
    is_fraud=False
)

# Scenario 2: Unusual Amount Transaction
unusual_amount_transaction = TransactionModel(
    trans_date_trans_time=datetime(2024, 3, 15, 15, 30),
    cc_num="4532123456789012",
    merchant="Target",
    category="retail",
    amt=Decimal("1599.99"),  # Much higher than typical
    first="Sarah",
    last="Johnson",
    gender="F",
    street="742 Maple Avenue",
    city="Seattle",
    state="WA",
    zip="98101",
    lat=47.6062,
    long=-122.3321,
    city_pop=737015,
    job="Software Engineer",
    dob=date(1988, 5, 12),
    trans_num="T000002",
    unix_time=1710516000,
    merch_lat=47.6142,
    merch_long=-122.3331,
    is_fraud=True
)

# Scenario 3: Unusual Location + Time Transaction
unusual_location_time_transaction = TransactionModel(
    trans_date_trans_time=datetime(2024, 3, 15, 3, 45),  # Unusual hour
    cc_num="4532123456789012",
    merchant="Unknown Store",  # Unusual merchant
    category="retail",
    amt=Decimal("299.99"),
    first="Sarah",
    last="Johnson",
    gender="F",
    street="742 Maple Avenue",
    city="Seattle",
    state="WA",
    zip="98101",
    lat=47.6062,
    long=-122.3321,
    city_pop=737015,
    job="Software Engineer",
    dob=date(1988, 5, 12),
    trans_num="T000003",
    unix_time=1710482700,
    merch_lat=47.9062,  # Much further from home
    merch_long=-122.5321,
    is_fraud=True
)

# Scenario 4: Impossible Travel Transaction
impossible_travel_transaction = TransactionModel(
    trans_date_trans_time=datetime(2024, 3, 15, 13, 0),  # Just 15 mins after last transaction
    cc_num="4532123456789012",
    merchant="Best Buy",
    category="retail",
    amt=Decimal("899.99"),
    first="Sarah",
    last="Johnson",
    gender="F",
    street="742 Maple Avenue",
    city="Seattle",
    state="WA",
    zip="98101",
    lat=47.6062,
    long=-122.3321,
    city_pop=737015,
    job="Software Engineer",
    dob=date(1888, 5, 12),
    trans_num="T000004",
    unix_time=1710505200,
    merch_lat=34.0522,  # Los Angeles coordinates
    merch_long=-118.2437,
    is_fraud=True
)


In [35]:
def create_fraud_analysis_prompt(
    transaction: TransactionModel,
    profile: CardholderProfile,
    history: List[Dict]
) -> Tuple[str, Dict[str, Any]]:
    """
    Analyzes transaction context and creates a natural language LLM prompt for fraud detection.
    
    Returns:
        Tuple containing (prompt_template, input_values)
    """
    # Analyze transaction context
    current_location = (float(transaction.merch_lat), float(transaction.merch_long))
    distance_from_home = geodesic(profile.home_location, current_location).miles
    
    # Category analysis
    category_typical_amount = profile.typical_amounts.get(transaction.category, 0)
    amount_deviation = (
        abs(float(transaction.amt) - category_typical_amount) / category_typical_amount
        if category_typical_amount > 0
        else 1.0
    )

    # Time and pattern analysis
    hour = transaction.trans_date_trans_time.hour
    unusual_hour = hour not in profile.active_hours

    # Travel and history analysis
    travel_info = ""
    history_context = "No previous transaction history is available."
    last_time = None
    hours_diff = 0
    last_location = None
    
    if history:
        last_tx = sorted(history, key=lambda x: x["timestamp"])[-1]
        last_time = datetime.strptime(last_tx["timestamp"], "%Y-%m-%d %H:%M:%S")
        last_location = (float(last_tx["merch_lat"]), float(last_tx["merch_long"]))
        
        if last_time < transaction.trans_date_trans_time:
            distance = geodesic(last_location, current_location).miles
            hours_diff = (transaction.trans_date_trans_time - last_time).total_seconds() / 3600
            
            if hours_diff > 0:
                speed = distance / hours_diff
                if speed > 500:
                    travel_info = f" The transaction shows an unusually rapid change in location, indicating a travel speed of {speed:.1f} mph between transactions, which exceeds normal travel speeds."
            
            history_context = (
                f"Their last transaction was {last_time.strftime('%B %d at %I:%M %p')}, "
                f"approximately {hours_diff:.1f} hours ago, "
                f"at coordinates {last_location}."
            )

    # Create context strings for the narrative
    merchant_context = "new to this customer" if transaction.merchant not in profile.common_merchants else "frequently visited by this customer"
    category_context = "unusual" if transaction.category not in profile.common_categories else "common"
    time_context = "outside their normal active hours" if unusual_hour else "within their typical active hours"
    amount_context = (
        f"{'significantly higher' if amount_deviation > 2 else 'somewhat higher' if amount_deviation > 1.2 else 'typical'} "
        f"for this category of purchase"
    )

    # History context is now handled in the travel analysis section above

    # Create the prompt template with natural language
    prompt_template = """You are an expert fraud detection analyst within a financial institution's security system. Your role is to evaluate transactions for potential fraud, keeping in mind that your assessments will be reviewed by human analysts. Since failing to detect fraud is more costly than false alarms, you should flag any genuinely suspicious patterns while providing clear reasoning.

Transaction Context:
A {age}-year-old {gender} who works as a {job} has made a purchase of ${amount:.2f} at {merchant} ({category}) on {transaction_time}. This merchant is {merchant_context}, and this category of purchase is {category_context} for them. The transaction occurred {time_context}. The purchase amount is {amount_context}.

Location Analysis:
The transaction occurred {distance_from_home:.1f} miles from the customer's home location, while their usual activity radius is {usual_radius:.1f} miles.{travel_info}

Customer Profile:
This customer typically shops at: {common_merchants}
Their usual purchase categories include: {common_categories}

Transaction History:
{history_context}

Please analyze this transaction for potential fraud indicators. Consider:
1. The location and travel patterns
2. Transaction amount and category
3. Timing and frequency
4. Alignment with customer profile
5. Any unusual patterns or deviations
6. If the profile is typical for the age, job and gender

Provide your assessment and specifically highlight any suspicious patterns that warrant attention. When you consider the transcation to be fraudulent YOU MUST INCLUDE '!FLAGGED!, such that the transaction can be forwarded to a human.'"""

    # Create the input values dictionary with formatted values
    input_values = {
        "amount": float(transaction.amt),
        "merchant": transaction.merchant,
        "category": transaction.category,
        "transaction_time": transaction.trans_date_trans_time.strftime("%B %d at %I:%M %p"),
        "merchant_context": merchant_context,
        "category_context": category_context,
        "time_context": time_context,
        "amount_context": amount_context,
        "distance_from_home": distance_from_home,
        "travel_info": travel_info,
        "common_merchants": ", ".join(profile.common_merchants) or "No established shopping patterns yet",
        "common_categories": ", ".join(profile.common_categories) or "No established category patterns yet",
        "history_context": history_context,
        "age": profile.age,
        "gender": profile.gender,
        "job": profile.job,
        "usual_radius": profile.usual_radius
    }

    return prompt_template, input_values

In [36]:
prompt, input_values = create_fraud_analysis_prompt(transaction=normal_transaction, profile=test_profile, history=transaction_history)
formatted_prompt = prompt.format(**input_values)
print(formatted_prompt)


You are an expert fraud detection analyst within a financial institution's security system. Your role is to evaluate transactions for potential fraud, keeping in mind that your assessments will be reviewed by human analysts. Since failing to detect fraud is more costly than false alarms, you should flag any genuinely suspicious patterns while providing clear reasoning.

Transaction Context:
A 1021-year-old F who works as a Software Engineer has made a purchase of $95.50 at Target (retail) on March 15 at 02:30 PM. This merchant is frequently visited by this customer, and this category of purchase is common for them. The transaction occurred within their typical active hours. The purchase amount is typical for this category of purchase.

Location Analysis:
The transaction occurred 0.6 miles from the customer's home location, while their usual activity radius is 10.0 miles.

Customer Profile:
This customer typically shops at: Whole Foods, Starbucks, Target, Shell, Trader Joe's
Their usual