In [18]:
import os
from langchain_openai import AzureChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from azure.storage.blob import BlobServiceClient
from dotenv import load_dotenv
import re
import json
print(langchain.__version__)
print(openai.__version__)

1.2.3
2.15.0


In [None]:
load_dotenv()

SENSITIVE_KEYS = {
    "account_id",
    "account_number",
    "card_number",
    "ssn",
    "email",
    "ip_address",
    "url",
    "domain"
}

llm = AzureChatOpenAI(
        azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
        api_key=os.getenv("AZURE_OPENAI_API_KEY"),
        api_version="2024-12-01-preview",
        azure_deployment="gpt-4.1-mini",
        temperature=0.3
    )

In [None]:
def local_guardrail_redact_json(raw_json: str) -> str:
    try:
        data = json.loads(raw_json)
    except json.JSONDecodeError:
        raise ValueError("Invalid JSON input")

    def redact(obj):
        if isinstance(obj, dict):
            redacted = {}
            for k, v in obj.items():
                if k.lower() in SENSITIVE_KEYS:
                    redacted[k] = f"[{k.upper()}_REDACTED]"
                else:
                    redacted[k] = redact(v)
            return redacted

        elif isinstance(obj, list):
            return [redact(item) for item in obj]

        elif isinstance(obj, str):
            return redact_value(obj)

        else:
            return obj

    sanitized = redact(data)
    return json.dumps(sanitized, indent=2)

In [None]:
def generate_sar_narrative(transaction_json):
    

    prompt = ChatPromptTemplate.from_messages([
        ("system", """You are an AML compliance analyst writing lawful SAR summaries. 
        Output must be plain English with no markdown and no line breaks. Return a single, continuous paragraph.
        Use ONLY the information provided.Do NOT invent names, entities, or facts"""),
        ("user", """Write a concise SAR narrative (<=300 words) from this JSON. 
        Include who, what, when, where, how, why, detection source, and amounts. 
        End with one sentence stating the report date, amount (if any), and main entity. {data}""")
    ])

    chain = prompt | llm
    

    safe_data = local_guardrail_redact_json(transaction_json)
    
    try:
        response = chain.invoke({"data": safe_data})
        return {
            "narrative": response.content,
            "redacted_input": safe_data 
        }

    except Exception as e:
        raise RuntimeError(f"LLM generation failed: {e}")

In [None]:
def save_to_azure_blob_json(filename: str, data: dict):
    connect_str = os.getenv("AZURE_STORAGE_CONNECTION_STRING")
    blob_service_client = BlobServiceClient.from_connection_string(connect_str)

    blob_client = blob_service_client.get_blob_client(
        container="sar-reports",
        blob=filename
    )

    payload = json.dumps(data, indent=2)

    print(f"Uploading JSON to Azure Storage as {filename}...")
    blob_client.upload_blob(
        payload,
        overwrite=True,
        content_settings=None  
    )

    return "Upload Successful"

In [None]:
if __name__ == "__main__":
    raw_input_json = json.dumps({
        "account_id": "123456789012",
        "amount": 50000,
        "type": "Cash Deposit"
    })
    sar_output = generate_sar_narrative(raw_input_json)

    print("\n--- SAR OUTPUT (JSON) ---")
    print(json.dumps(sar_output, indent=2))

    save_to_azure_blob_json(
        filename="sar_report_001.json",
        data=sar_output
    )