In [1]:
# **Demo: Fraud Detection in Call Transcripts Using GPT-4 & RAG**

## **Step 1: Install Required Libraries**
##First, install the necessary Python libraries.

!pip install openai langchain faiss-cpu sentence-transformers spacy flask
!python -m spacy download en_core_web_sm


Collecting en-core-web-sm==3.7.1
  Using cached https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [2]:
!pip install openai==0.28



In [3]:
import openai
import torch
import spacy
import faiss
import numpy as np
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from sentence_transformers import SentenceTransformer
from flask import Flask, request, jsonify


In [23]:
# Load spaCy for text cleaning
nlp = spacy.load("en_core_web_sm")

# Function to clean and preprocess text
def preprocess_text(text):
    doc = nlp(text)
    tokens = [token.text for token in doc if not token.is_stop and not token.is_punct]
    return " ".join(tokens)

# Example call transcript dataset
sample_texts = ["I was in a car accident and I need to claim for the damages. The other driver disappeared without a trace.",
    "My car was stolen last night. Can you process my claim right away? I have no proof but trust me, it was stolen!",
    "I was involved in a crash, but I don't have the police report yet. Can you still approve my claim?",
    "The accident was definitely not my fault, but I'm claiming for more than the actual damages to get extra compensation.",
    "I crashed my car while driving but I don’t have the details on the other car involved. I just want to make sure the claim is processed as soon as possible.",
    "My car was in a parking lot and got hit by another vehicle, but I don't have the license plate number or any witnesses. Can you help me with the claim?",
    "I was driving in heavy rain, lost control, and hit a tree. I need the maximum payout, but there’s no damage to the tree or surrounding area to prove the accident was severe.",
    "I was rear-ended last week and my car got some damage. But I want to make a claim for a much bigger amount than what it’s worth, based on my injuries."
]

# Preprocess the sample transcripts
preprocessed_texts = [preprocess_text(text) for text in sample_texts]


In [24]:
import openai
import os
from dotenv import load_dotenv

# Load the environment variables from the .env file
load_dotenv()

# Get the API key from the environment
api_key = os.getenv("USE YOUR API KEY")

# Check if the API key is loaded
if api_key:
    openai.api_key = api_key
else:
    print("API Key not found. Please ensure the .env file contains the key.")

def analyze_fraud_gpt4(transcript):
    """Use GPT-4 to classify fraudulent behavior."""
    prompt = f"""
    You are an AI fraud detection system analyzing customer service call transcripts.
    Identify whether the following transcript contains fraudulent behavior and explain why.

    Transcript: "{transcript}"

    Output format:
    - **Fraud Detected:** Yes/No
    - **Reason:** Explanation of why it is or isn't fraudulent.
    """

    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[{"role": "system", "content": "You are a fraud detection AI."},
                  {"role": "user", "content": prompt}]
    )

    return response["choices"][0]["message"]["content"]

# Test GPT-4 on a sample transcript
print(analyze_fraud_gpt4("I was involved in a crash, but I don't have the police report yet. Can you still approve my claim?"))


API Key not found. Please ensure the .env file contains the key.
- **Fraud Detected:** No
- **Reason:** The statement does not indicate any fraudulent activity. The caller just mentioned they were involved in a crash and they do not yet have the police report. It is quite normal to not have a police report immediately after a crash. There's no falsehood or attempt to deceive evident in the transcript, therefore there's no reason to suspect fraud based on the given conversation. It's advisable to wait for more evidence before making a judgment.


In [25]:
# Load sentence transformer for embeddings
encoder = SentenceTransformer("all-MiniLM-L6-v2")

# Example fraud cases for retrieval
fraud_cases = [
    "A customer claimed their car was stolen but later found out it was involved in a staged accident.",
    "A driver reported a crash that occurred weeks ago, trying to claim damages after a delay in filing the report.",
    "Customer falsely exaggerated the damages from a minor collision to claim higher compensation.",
    "A person filed a claim for a hit-and-run accident but could not provide any evidence or witnesses to support the claim.",
    "A driver attempted to claim insurance for an accident that occurred after their policy expired, hoping to trick the insurer."
]

# Encode and store fraud cases in FAISS
fraud_embeddings = encoder.encode(fraud_cases)
index = faiss.IndexFlatL2(fraud_embeddings.shape[1])
index.add(fraud_embeddings)

def retrieve_similar_cases(query_text, top_k=2):
    """Retrieve similar fraud cases from FAISS index."""
    query_vector = encoder.encode([query_text])
    distances, indices = index.search(query_vector, top_k)
    return [fraud_cases[i] for i in indices[0]]

# Test retrieval on a suspicious transcript
query = "Someone asked me to share my OTP for security verification."
print("Similar Cases:", retrieve_similar_cases(query))


Similar Cases: ['A customer claimed their car was stolen but later found out it was involved in a staged accident.', 'A driver attempted to claim insurance for an accident that occurred after their policy expired, hoping to trick the insurer.']


In [26]:
def analyze_with_confidence(transcript):
    """Add a confidence threshold to GPT-4 fraud detection."""
    result = analyze_fraud_with_rag(transcript)

    # Extract fraud decision from GPT-4 output
    fraud_detected = "Yes" in result
    confidence_score = np.random.uniform(0.75, 0.98)  # Placeholder for real confidence score

    if confidence_score < 0.8:
        return "Uncertain - Needs human review"
    return result

# Test with an ambiguous case
print(analyze_with_confidence("Can you confirm your full account number for verification?"))


- **Fraud Detected:** No
- **Reason:** The provided transcript merely suggests a standard procedure for customer identification or verification. It doesn't contain any information about misrepresentation, false claims or suspicious behavior related to insurance or financial transactions. Past fraud cases discussed are irrelevant to the content in this specific transcript. Without further context, there is no clear sign of fraudulent activity.


In [17]:
app = Flask(__name__)

@app.route("/detect_fraud", methods=["POST"])
def detect_fraud():
    """API endpoint for fraud detection in call transcripts."""
    data = request.json
    transcript = data["text"]
    result = analyze_with_confidence(transcript)
    return jsonify({"fraud_analysis": result})

if __name__ == "__main__":
    app.run(host="0.0.0.0", port=5000)

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://172.28.0.12:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
