In [19]:
from dotenv import load_dotenv
import os

# Load environment variables from .env file
load_dotenv()

# Access the API key securely
openai.api_key = os.getenv("OPENAI_API_KEY")


In [20]:
import faiss
import numpy as np
import openai  # Use any LLM service (GPT-4, Claude, Llama, etc.)

# Load FAISS index & embeddings
index = faiss.read_index("transaction_faiss.index")
transaction_embeddings = np.load("transaction_embeddings.npy").astype("float32")

# Set up OpenAI API key
openai.api_key = os.getenv("OPENAI_API_KEY")

In [21]:

# Function to find similar transactions
def find_similar_transactions(query_embedding, top_k=5):
    query_embedding = np.array([query_embedding], dtype="float32")  # Reshape for FAISS
    distances, indices = index.search(query_embedding, top_k)  # Retrieve top-k similar transactions
    return distances, indices

# Function to generate explanations using LLM
# def generate_explanation(query_id, df_original):
#     # Retrieve query transaction embedding
#     query_embedding = transaction_embeddings[query_id]

#     # Find similar transactions (but don't emphasize similarity scores in explanation)
#     distances, indices = find_similar_transactions(query_embedding, top_k=5)
    
#     # Create a structured prompt for the LLM
#     prompt = f"""
#     You are an expert financial auditor. A transaction (ID: {query_id}) has been flagged as an anomaly.
#     Your task is to analyze why this transaction might be suspicious from an **audit perspective**.

#     Consider the following possible audit risks:
#     - **Fraud Indicators**: Unusual transaction amounts, duplicate payments, suspicious vendor relationships.
#     - **Regulatory Compliance Issues**: Missing approvals, incorrect account postings, violations of internal controls.
#     - **Operational Risks**: Unusual frequency of transactions, misclassified expenses, erroneous journal entries.

#     Provide a **clear audit-style explanation** for why Transaction {query_id} is anomalous. Do **not** mention similarity scores. Focus on audit risks and financial red flags.

#     """
    
#     # Generate explanation from LLM
#     response = openai.ChatCompletion.create(
#         model="gpt-4",
#         messages=[{"role": "system", "content": "You are an expert financial auditor."},
#                   {"role": "user", "content": prompt}]
#     )

#     explanation = response["choices"][0]["message"]["content"]
#     return explanation






In [18]:
def generate_audit_explanation_rag(query_id, df_original):
    """
    Generate an audit-focused explanation for an anomalous transaction using RAG.
    """
    # Retrieve query transaction embedding
    query_embedding = transaction_embeddings[query_id]

    # Find similar transactions
    distances, indices = find_similar_transactions(query_embedding, top_k=5)

    # Extract transaction metadata for similar cases
    retrieved_transactions = df_original.iloc[indices[0]].to_dict(orient="records")

    # Format retrieved data into structured knowledge
    retrieved_info = "\n".join([
        f"- **Transaction ID**: {t['BELNR']}, **Amount**: {t['DMBTR']}, **Company Code**: {t['BUKRS']}, **Posting Key**: {t['BSCHL']}"
        for t in retrieved_transactions
    ])

    # Create a structured prompt for the LLM
    prompt = f"""
    You are an expert financial auditor analyzing an anomalous transaction (ID: {query_id}).
    Below is the retrieved knowledge about similar past anomalies:

    {retrieved_info}

    Using this information, explain **why transaction {query_id} is anomalous** based on **audit risks**, including:
    - **Fraud Indicators** (e.g., duplicate payments, unusual amounts, suspicious vendor activity)
    - **Regulatory Violations** (e.g., missing approvals, misclassified expenses)
    - **Operational Risks** (e.g., frequent transactions to unknown accounts)

    Provide a **detailed financial audit explanation**, and suggest **next steps for the auditor**.
    """

    # Generate explanation from LLM
    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[{"role": "system", "content": "You are an expert financial auditor."},
                  {"role": "user", "content": prompt}]
    )

    explanation = response["choices"][0]["message"]["content"]
    return explanation

import pandas as pd 

# Load your original transaction dataset (same one used to train embeddings)
df_original = pd.read_csv("datathon_data.csv")  # Replace with your actual file path

# Select an anomaly transaction ID for analysis
query_id = 506926  # Change this to any anomaly ID you want to analyze

# Generate audit explanation using RAG
explanation = generate_audit_explanation_rag(query_id, df_original)

# Print the generated explanation
print("\n🔍 Audit Explanation for Transaction", query_id)
print(explanation)



🔍 Audit Explanation for Transaction 506926
The transaction 506926 does indeed appear to be anomalous when compared to other similar transactions based on a few audit risk factors. 

**Fraud Indicators** 

Duplicate Payments: While there don't appear to be any duplicate payments in terms of exact payment amounts, the similarity in the amounts listed across all transactions is concerning and suggests there may be some 'rounding' or systematic formula driving this cloning of transactions. 

Unusual Amounts: The large transaction amounts do appear to be consistent across all listed transactions, but the amount is significant enough to raise some concerns. If this amount doesn't fall in line with the company's usual transaction range, it's definitely treated as an unusual and thus suspicious activity. 

**Regulatory Violations**

Missing approvals: While we don't have the data related to approvals for this transaction, it is something that should absolutely be checked. A transaction of suc