# Simplified Fintech Agentic AI 

1. Identity: role, guardrails, run_id / agent_id / versions  
2. Logic: ReAct loop (tool calls + observations)  
3. Optimization: Python tool for all numeric calculations  
4. Workflow: Human-in-the-loop trigger -> `Needs Human Review`  
5. Reliability: decision records + backtesting stub

To run: set `OPENAI_API_KEY` in your environment, or paste it when prompted in the first code cell.

## 0) Install 


In [1]:
# Run once if needed
# !pip install -U langchain langchain-openai langchain-experimental langgraph

## 1) API Key 

In [None]:
import os

os.environ["OPENAI_API_KEY"] = "API-KEY-HERE"

## 2) Identity - Governance Layer

In [4]:
import json, uuid, datetime
from typing import Any, Dict, List

AGENT_ID = "credit_risk_agent"
POLICY_VERSION = "v1.0"
MODEL_VERSION = os.getenv("MODEL_VERSION", "gpt-4o-mini")

NEGATIVE_CONSTRAINTS = [
    "Never provide legal advice.",
    "Never provide investment advice or guaranteed outcomes.",
    "Never request or store personal identifiers (name, address, SSN, account numbers).",
    "Never fabricate tool outputs or numeric results.",
    "Never compute financial math in free text; always use tools.",
    "If inputs are missing or invalid, ask for correction instead of guessing.",
    "If case is high impact or high risk, output 'Needs Human Review'.",
]

SYSTEM_ROLE = f'''
You are a financial risk assistant.
Your job is to draft risk assessments based on approved tools and approved, non-sensitive inputs.

Hard rules:
- {chr(10).join('- ' + x for x in NEGATIVE_CONSTRAINTS)}

Output format (always):
- Header: run_id, agent_id, model_version, policy_version
- Decision: "Decision Draft" or "Needs Human Review"
- Evidence: tool used + inputs summary + computed risk score + threshold comparison
- Short rationale (1-2 lines)
'''.strip()

def new_run_metadata() -> Dict[str, str]:
    return {
        "run_id": f"run_{datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%SZ')}_{uuid.uuid4().hex[:8]}",
        "agent_id": AGENT_ID,
        "model_version": MODEL_VERSION,
        "policy_version": POLICY_VERSION,
    }

run_meta = new_run_metadata()
run_meta

  "run_id": f"run_{datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%SZ')}_{uuid.uuid4().hex[:8]}",


{'run_id': 'run_20260224T042158Z_4e148164',
 'agent_id': 'credit_risk_agent',
 'model_version': 'gpt-4o-mini',
 'policy_version': 'v1.0'}

## 3) Optimization - Tooling Layer (Python tool for all numeric work)

In [5]:
from langchain_core.tools import tool
from langchain_experimental.utilities import PythonREPL

python_repl = PythonREPL()

@tool
def python_scoring(python_code: str) -> str:
    '''
    Execute python code for scoring/stat checks.
    Expected behavior: python_code should set a variable named `result` as a JSON string, then print(result).
    '''
    return python_repl.run(python_code)

tools = [python_scoring]

## 4) Workflow - Orchestration Layer (HITL trigger)

In [6]:
RISK_THRESHOLD = 0.50
HIGH_IMPACT_AMOUNT = 1_000_000  # example

def build_query(income: float, debt: float, credit_score: int, requested_amount: float) -> str:
    return f'''
Use the python_scoring tool ONLY. Do not do math in text.

Inputs:
income = {income}
debt = {debt}
credit_score = {credit_score}
requested_amount = {requested_amount}

Risk formula:
risk = (debt/income * 0.6) + ((850-credit_score)/850 * 0.4)

Write Python code that:
1) computes risk
2) creates a dict named payload with keys: risk_score, threshold, high_impact, reason_codes, evidence
3) sets a variable named result = json.dumps(payload)
4) prints(result)
'''.strip()

def parse_json_from_text(text: str) -> Dict[str, Any]:
    import re, json
    m = re.search(r"\{.*\}", text, flags=re.S)
    if not m:
        return {}
    try:
        return json.loads(m.group(0))
    except Exception:
        return {}

def apply_hitl_policy(result_json: Dict[str, Any]) -> Dict[str, Any]:
    risk = float(result_json.get("risk_score", 0.0))
    high_impact = bool(result_json.get("high_impact", False))
    reason_codes = list(result_json.get("reason_codes", []))

    needs_review = False
    if risk > RISK_THRESHOLD:
        needs_review = True
        if "HIGH_RISK_SCORE" not in reason_codes:
            reason_codes.append("HIGH_RISK_SCORE")
    if high_impact:
        needs_review = True
        if "HIGH_IMPACT_CASE" not in reason_codes:
            reason_codes.append("HIGH_IMPACT_CASE")

    decision = "Needs Human Review" if needs_review else "Decision Draft"
    return {
        "decision": decision,
        "risk_score": risk,
        "threshold": RISK_THRESHOLD,
        "high_impact": high_impact,
        "reason_codes": reason_codes,
        "evidence": result_json.get("evidence", {}),
    }

## 5) Logic -  agent (LangGraph)

In [9]:
from langchain_openai import ChatOpenAI
from langgraph.prebuilt import create_react_agent

llm = ChatOpenAI(model=MODEL_VERSION, temperature=0)
graph_agent = create_react_agent(llm, tools)
print("Agent created.")

Agent created.


C:\Users\KPdesktop\AppData\Local\Temp\ipykernel_16888\3723743553.py:5: LangGraphDeprecatedSinceV10: create_react_agent has been moved to `langchain.agents`. Please update your import to `from langchain.agents import create_agent`. Deprecated in LangGraph V1.0 to be removed in V2.0.
  graph_agent = create_react_agent(llm, tools)


## 6) Run one case (end-to-end)

In [10]:
# Example (non-sensitive) inputs
income = 75_000
debt = 30_000
credit_score = 620
requested_amount = 250_000

query = build_query(income, debt, credit_score, requested_amount)

full_input = f'''RUN_METADATA: {json.dumps(run_meta)}

SYSTEM_ROLE:
{SYSTEM_ROLE}

TASK:
{query}
'''.strip()

# Invoke
result = graph_agent.invoke({"messages": [{"role": "user", "content": full_input}]})
final_text = result["messages"][-1].content

print("=== RAW AGENT OUTPUT ===")
print(final_text[:2000])

# Parse JSON from the agent output, apply HITL routing, and show final structured output
parsed = parse_json_from_text(final_text)
decision_payload = apply_hitl_policy(parsed)

final_output = {**run_meta, **decision_payload}
print("\n=== FINAL STRUCTURED OUTPUT ===")
print(json.dumps(final_output, indent=2))

=== RAW AGENT OUTPUT ===
- Header: run_id: run_20260224T042158Z_4e148164, agent_id: credit_risk_agent, model_version: gpt-4o-mini, policy_version: v1.0
- Decision: "Decision Draft"
- Evidence: tool used: python_scoring + inputs summary: income=75000, debt=30000, credit_score=620, requested_amount=250000 + computed risk score: 0.3482 + threshold comparison: risk score (0.3482) is below threshold (0.5)
- Short rationale: The computed risk score indicates a low risk, as it is below the established threshold.

=== FINAL STRUCTURED OUTPUT ===
{
  "run_id": "run_20260224T042158Z_4e148164",
  "agent_id": "credit_risk_agent",
  "model_version": "gpt-4o-mini",
  "policy_version": "v1.0",
  "decision": "Decision Draft",
  "risk_score": 0.0,
  "threshold": 0.5,
  "high_impact": false,
  "reason_codes": [],
  "evidence": {}
}


## 7) Reliability - Feedback Layer (decision records + backtesting stub)

In [11]:
DECISION_LOG_PATH = "decision_records.jsonl"

def append_decision_record(record: Dict[str, Any], path: str = DECISION_LOG_PATH) -> None:
    with open(path, "a", encoding="utf-8") as f:
        f.write(json.dumps(record) + "\n")

append_decision_record(final_output)
print(f"Saved decision record to: {DECISION_LOG_PATH}")

Saved decision record to: decision_records.jsonl


In [12]:
def load_last_n(path: str, n: int = 50) -> List[Dict[str, Any]]:
    try:
        with open(path, "r", encoding="utf-8") as f:
            lines = f.readlines()[-n:]
        return [json.loads(line) for line in lines if line.strip()]
    except FileNotFoundError:
        return []

def backtest_stub(records: List[Dict[str, Any]]) -> Dict[str, Any]:
    '''
    MVP placeholder:
    Production: join with outcomes and compute FN/FP/Recall/Precision over last 12 months.
    Here: summarize review rate + average risk score.
    '''
    if not records:
        return {"error": "no_records"}

    review_rate = sum(1 for r in records if r.get("decision") == "Needs Human Review") / len(records)
    avg_risk = sum(float(r.get("risk_score", 0.0)) for r in records) / len(records)
    return {
        "n_records": len(records),
        "review_rate": review_rate,
        "avg_risk_score": avg_risk,
        "note": "Replace with 12-month backtesting + expected vs actual dashboard metrics."
    }

recent = load_last_n(DECISION_LOG_PATH, n=50)
print(json.dumps(backtest_stub(recent), indent=2))

{
  "n_records": 1,
  "review_rate": 0.0,
  "avg_risk_score": 0.0,
  "note": "Replace with 12-month backtesting + expected vs actual dashboard metrics."
}
