Parallel Agent Execution with LangGraph

In [16]:
import json
import os

OUTPUT_DIR = "outputs"

with open(os.path.join(OUTPUT_DIR, "legal_agent_output.json"), "r", encoding="utf-8") as f:
    legal_output = json.load(f)

with open(os.path.join(OUTPUT_DIR, "compliance_agent_output.json"), "r", encoding="utf-8") as f:
    compliance_output = json.load(f)

with open(os.path.join(OUTPUT_DIR, "finance_agent_output.json"), "r", encoding="utf-8") as f:
    finance_output = json.load(f)

with open(os.path.join(OUTPUT_DIR, "operations_agent_output.json"), "r", encoding="utf-8") as f:
    operations_output = json.load(f)

print("Agent outputs loaded successfully")


Agent outputs loaded successfully


In [17]:
from typing import TypedDict, Optional
from langgraph.graph import StateGraph, END, START

Imported LangGraph utilities and standard libraries required for parallel execution.

In [18]:
# define graph state
class GraphState(TypedDict):
    query: str
    combined_legal_text: str
    combined_compliance_text: str
    combined_finance_text: str
    combined_operations_text: str
    legal: Optional[dict]
    compliance: Optional[dict]
    finance: Optional[dict]
    operations: Optional[dict]

Defined shared state structure to support parallel agent execution.

In [19]:
def validate_agent_output(output_str, clause_type=""):
    # Remove Markdown code fences if present
    cleaned = output_str.strip()
    cleaned = cleaned.replace("```json", "").replace("```", "").strip()

    # Try parsing JSON
    try:
        output = json.loads(cleaned)
    except Exception as e:
        return {
            "clause_type": clause_type,
            "extracted_clauses": [],
            "risk_level": "unknown",
            "confidence": 0.0,
            "evidence": [],
            "error": f"Invalid JSON from model: {str(e)}"
        }

    # Build validated result
    validated = {
        "clause_type": clause_type,
        "extracted_clauses": output.get("extracted_clauses", []),
        "risk_level": output.get("risk_level", "unknown"),
        "confidence": output.get("confidence", 0.0),
        "evidence": output.get("evidence", [])
    }

    return validated

In [20]:
import requests
import json

class BaseAgent:
    def __init__(self, agent_name, system_prompt, model="gemma3:4b"):
        self.agent_name = agent_name
        self.system_prompt = system_prompt
        self.model = model

    def run(self, context_text):
        payload = {
            "model": self.model,
            "prompt": f"{self.system_prompt}\n\nUser Input:\n{context_text}",
            "temperature": 0
        }

        # Stream=True → handle incremental JSON
        response = requests.post(
            "http://localhost:11434/api/generate",
            json=payload,
            stream=True
        )

        full_response = ""

        # Read streaming chunks
        for line in response.iter_lines():
            if not line:
                continue
            try:
                data = json.loads(line.decode("utf-8"))
            except:
                continue

            # Ollama sends chunks as {"response": "..."}
            if "response" in data:
                full_response += data["response"]

        return full_response.strip()


In [21]:
LEGAL_AGENT_PROMPT = """
You are a Legal Contract Analysis Agent.

Your tasks:
1. Identify legal clauses (Termination, Governing Law, Jurisdiction).
2. Extract exact clause text from the provided contract section.
3. Assess legal risk: low, medium, or high.
4. Provide a confidence score between 0 and 1.
5. Include evidence (exact sentences that justify your conclusion).

Return ONLY valid JSON in this format:
{
  "extracted_clauses": [],
  "risk_level": "",
  "confidence": 0.0,
  "evidence": []
}
"""
# Initialize legal agent
legal_agent = BaseAgent(
    agent_name="Legal Agent",
    system_prompt=LEGAL_AGENT_PROMPT,
    model="gemma3:4b"
)

In [22]:
COMPLIANCE_AGENT_PROMPT = """
You are a Compliance Risk Analysis Agent.

Your tasks:
1. Identify compliance-related clauses in the contract, including:
   - Data protection obligations
   - Regulatory requirements
   - Audit and reporting obligations
2. Extract exact compliance-related sentences.
3. Assess compliance risk as: low, medium, or high.
4. Provide a confidence score between 0 and 1.

Return ONLY valid JSON in this format:
{
  "extracted_clauses": [],
  "risk_level": "",
  "confidence": 0.0,
  "evidence": []
}
"""

# initialize Compliance Agent
compliance_agent = BaseAgent(
    agent_name="Compliance Agent",
    system_prompt=COMPLIANCE_AGENT_PROMPT,
    model="gemma3:4b"
)

In [94]:
FINANCE_AGENT_PROMPT = """
You are a Finance Risk Analysis Agent.

Identify ALL finance-related clauses, including but not limited to:
- Payment terms
- Invoice timelines
- Fees, costs, charges, compensation, royalties
- Penalties, late fees, interest, delinquency charges
- Collection rights
- Reimbursement obligations
- Financial liability or indemnification linked to monetary loss
- Billing cycles and invoice frequency

For EACH clause:
1. Identify clause type (e.g., Payment Terms, Late Fees, Compensation)
2. Extract exact clause text
3. Assess financial risk (low/medium/high)
4. Provide confidence score
5. Provide evidence (exact sentence)

Return ONLY valid JSON:
{
  "clause_type": "Finance",
  "extracted_clauses": [
    {
      "clause_type": "",
      "clause_text": "",
      "risk_level": "",
      "confidence": 0.0,
      "evidence": [""]
    }
  ],
  "risk_level": "",
  "confidence": 0.0,
  "evidence": []
}
"""

In [95]:

# Initialize Finance Agent
finance_agent = BaseAgent(
    agent_name="Finance Agent",
    system_prompt=FINANCE_AGENT_PROMPT,
    model="gemma3:4b"
)

In [24]:
OPERATIONS_AGENT_PROMPT = """
You are an Operations Risk Analysis Agent.

Your tasks:
1. Identify operational clauses related to:
   - Deliverables
   - Timelines and milestones
   - Service obligations
   - Performance standards or SLAs
2. Extract the exact clause text.
3. Assess execution risk as: low, medium, or high.
4. Provide a confidence score between 0 and 1.
5. Include evidence sentences.

Return ONLY valid JSON:
{
  "extracted_clauses": [],
  "risk_level": "",
  "confidence": 0.0,
  "evidence": []
}
"""

# Initialize Operations Agent
operations_agent = BaseAgent(
    agent_name="Operations Agent",
    system_prompt=OPERATIONS_AGENT_PROMPT,
    model="gemma3:4b"
)

In [25]:
# Define agent nodes
def legal_node(state):
    raw = legal_agent.run(state["combined_legal_text"])
    return {"legal": validate_agent_output(raw, clause_type="Legal")}

def compliance_node(state):
    raw = compliance_agent.run(state["combined_compliance_text"])
    return {"compliance": validate_agent_output(raw, clause_type="Compliance")}

def finance_node(state):
    raw = finance_agent.run(state["combined_finance_text"])
    return {"finance": validate_agent_output(raw, clause_type="Finance")}

def operations_node(state):
    raw = operations_agent.run(state["combined_operations_text"])
    return {"operations": validate_agent_output(raw, clause_type="Operations")}

Added execution time logging to compare parallel and sequential performance.

In [26]:
graph = StateGraph(GraphState)

graph.add_node("legal_agent", legal_node)
graph.add_node("compliance_agent", compliance_node)
graph.add_node("finance_agent", finance_node)
graph.add_node("operations_agent", operations_node)

graph.add_edge(START, "legal_agent")
graph.add_edge(START, "compliance_agent")
graph.add_edge(START, "finance_agent")
graph.add_edge(START, "operations_agent")

graph.add_edge("legal_agent", END)
graph.add_edge("compliance_agent", END)
graph.add_edge("finance_agent", END)
graph.add_edge("operations_agent", END)

app = graph.compile()

Configured LangGraph to execute multiple agents concurrently from a shared entry point.

In [27]:
# Pinecone Setup

import os
import json
import numpy as np
from tqdm.auto import tqdm
import matplotlib.pyplot as plt

from pinecone import Pinecone, ServerlessSpec
from sentence_transformers import SentenceTransformer
from dotenv import load_dotenv

load_dotenv()

assert "PINECONE_API_KEY" in os.environ, "PINECONE_API_KEY not found"

print("Pinecone API key loaded successfully")

pc = Pinecone(api_key=os.environ["PINECONE_API_KEY"])

model = SentenceTransformer("all-MiniLM-L6-v2")

print("Sentence-Transformer model loaded successfully")

Pinecone API key loaded successfully
Sentence-Transformer model loaded successfully


In [28]:
INDEX_NAME = "cuad-index-minilm"   
DIMENSION = 384                  

existing_indexes = [idx["name"] for idx in pc.list_indexes()]

if INDEX_NAME not in existing_indexes:
    pc.create_index(
        name=INDEX_NAME,
        dimension=DIMENSION,
        metric="cosine",
        spec=ServerlessSpec(
            cloud="aws",
            region="us-east-1"
        )
    )

index = pc.Index(INDEX_NAME)

print(f"Connected to Pinecone index: {INDEX_NAME}")

Connected to Pinecone index: cuad-index-minilm


In [29]:
# Building RAG Search Wrapper
import re
import json
import matplotlib.pyplot as plt
from typing import List, Dict

# # Sentence-Transformers embedding function for Pinecone
def embed_batch(texts):
    return model.encode(
        texts,
        show_progress_bar=False,
        convert_to_numpy=True
    )

# Embed a Query
def embed_query(query: str):
    return embed_batch([query])[0].tolist()

In [30]:
# Core RAG architecture
def rag_search(
    query: str,
    index,
    top_k: int = 5
) -> List[Dict]:
   
    query_vector = embed_query(query)

    results = index.query(
        vector=query_vector,
        top_k=top_k,
        include_metadata=True
    )

    retrieved_chunks = []

    for match in results["matches"]:
        retrieved_chunks.append({
            "score": match["score"],
            "contract_id": match["metadata"]["contract_id"],
            "chunk_id": match["metadata"]["chunk_id"],
            "text": match["metadata"]["text"]
        })

    return retrieved_chunks

In [31]:
# Retrieve compliance-focused context 
compliance_query = (
    "data protection gdpr hipaa audit regulatory compliance security privacy"
)

compliance_rag_results = rag_search(
    compliance_query,
    index,
    top_k=5
)

combined_compliance_text = "\n\n".join(
    [c["text"] for c in compliance_rag_results]
)

print(combined_compliance_text[:400])

# Retrieve legal-focused context 
legal_query = (
    "termination clause termination rights governing law jurisdiction legal risk"
)

legal_rag_results = rag_search(
    legal_query,
    index,
    top_k=5
)

combined_legal_text = "\n\n".join(
    [c["text"] for c in legal_rag_results]
)

print(combined_legal_text[:400])

15.2 [***].

16. Regulatory Matters.

16.1 Privacy and Security Matters. Concurrently with the execution of this Agreement, the Parties are executing a HIPAA Business Associate Agreement (the "BAA") in the form attached hereto as Exhibit E.

16.2 Technical Standards. The Company will provide Allscripts with Updates so that the Subscription Software Services can be implemented and configured to com
16. TERMINATION

16.1 Termination events: without prejudice to any other rights under this Agreement and/or at Law, either Party shall be entitled to terminate all or part of this Agreement by Notice of termination, as per Clauses 16.4 ("Termination procedure") and 16.6 ("Consequences of termination"), in the following events:

16. TERMINATION

16.1 Termination events: without prejudice to any oth


In [32]:
finance_query = (
    "payment terms invoicing fees penalties late fee financial liability "
    "compensation reimbursement billing charges"
)

finance_rag_results = rag_search(
    finance_query,
    index,
    top_k=5
)

combined_finance_text = "\n\n".join(
    [c["text"] for c in finance_rag_results]
)

print("FINANCE CONTEXT PREVIEW:\n", combined_finance_text[:400])

operations_query = (
    "deliverables obligations timelines milestones service level agreement SLA "
    'performance standards responsibilities duties implementation execution"'
)

operations_rag_results = rag_search(
    operations_query,
    index,
    top_k=5
)

combined_operations_text = "\n\n".join(
    [c["text"] for c in operations_rag_results]
)

print("OPERATIONS CONTEXT PREVIEW:\n", combined_operations_text[:400])

FINANCE CONTEXT PREVIEW:
 Source: REYNOLDS CONSUMER PRODUCTS INC., S-1, 11/15/2019

litigation to collect the amount owed and Seller prevails in the litigation, Buyer will reimburse Seller for actual, reasonable, substantiated out-of-pocket expenses incurred by Seller in collecting the delinquent amount and accrued late payment fees on the delinquent amount. Under no circumstance will the late payment fee payable to Seller
OPERATIONS CONTEXT PREVIEW:
 H. Combinational impacts (i.e., how one Service Level affects another);

 I. System implications; and

 J. Issues relating to Applicable Law.

3. SLA TEAM REVIEW.

 A. A joint Metavante-Customer team (the "SLA Team") shall review, evaluate and potentially modify the Service Level Changes and associated Business Case Assessments.

 B. At a minimum, the SLA Team shall consist of personnel designated


In [33]:
# Run Parallel execution
initial_state = {
    "query": "Analyze legal, compliance, finance, and operational risks",

    "combined_legal_text": combined_legal_text,
    "combined_compliance_text": combined_compliance_text,
    "combined_finance_text": combined_finance_text,
    "combined_operations_text": combined_operations_text,

    "legal": None,
    "compliance": None,
    "finance": None,
    "operations": None
}

result = app.invoke(initial_state)

Executed agents concurrently and recorded total runtime.

In [34]:
# Verify Parallel outputs
for key in result.keys():
    print(key)

query
combined_legal_text
combined_compliance_text
combined_finance_text
combined_operations_text
legal
compliance
finance
operations


Verified that all agent outputs were produced successfully in parallel.

PERSISTING AGENT OUTPUTS INTO MEMORY

In [35]:
import json
import os

PIPELINE_DIR = "pipeline_outputs"   # FIXED path

def load_pipeline_output(filename):
    path = os.path.join(PIPELINE_DIR, filename)
    with open(path, "r", encoding="utf-8") as f:
        return json.load(f)

legal_pipeline_output = load_pipeline_output("legal_pipeline_output.json")
compliance_pipeline_output = load_pipeline_output("compliance_pipeline_output.json")
finance_pipeline_output = load_pipeline_output("finance_pipeline_output.json")
operations_pipeline_output = load_pipeline_output("operations_pipeline_output.json")

print("Pipeline outputs loaded successfully")

Pipeline outputs loaded successfully


In [36]:
# Prepare Agent Outputs for Storage
import json

legal_output       = legal_pipeline_output["analysis"]
compliance_output  = compliance_pipeline_output["analysis"]
finance_output     = finance_pipeline_output["analysis"]
operations_output  = operations_pipeline_output["analysis"]

In [37]:
agent_outputs = {
    "legal": legal_output,
    "compliance": compliance_output,
    "finance": finance_output,
    "operations": operations_output
}

Collected agent outputs for persistent storage.

In [38]:
# Convert Outputs to Text
def agent_output_to_text(agent_name, output):
    extracted = output.get("extracted_clauses", [])

    clause_texts = []

    for clause in extracted:
        if isinstance(clause, str):
            clause_texts.append(clause)
        elif isinstance(clause, dict):
            # prefer 'text', fallback to string representation
            clause_texts.append(clause.get("text", str(clause)))

    clauses_joined = "\n".join(clause_texts)

    risk = output.get("risk_level", "unknown")
    confidence = output.get("confidence", 0.0)

    return (
        f"Agent: {agent_name}\n"
        f"Risk Level: {risk}\n"
        f"Confidence: {confidence}\n"
        f"Extracted Clauses:\n{clauses_joined}"
    )

Converted structured agent outputs into text for embedding.

In [39]:
# Create vector records
records = []

for agent, output in agent_outputs.items():
    records.append({
        "agent": agent,
        "text": agent_output_to_text(agent, output),
        "metadata": {
            "agent": agent,
            "risk_level": output.get("risk_level"),
            "confidence": output.get("confidence")
        }
    })

len(records)

4

The text is now converted Pinecone ready vector records.

In [40]:
# Pinecone Setup

import os
import json
import numpy as np
from tqdm.auto import tqdm
import matplotlib.pyplot as plt

from pinecone import Pinecone, ServerlessSpec
from sentence_transformers import SentenceTransformer
from dotenv import load_dotenv

load_dotenv()

assert "PINECONE_API_KEY" in os.environ, "PINECONE_API_KEY not found"

print("Pinecone API key loaded successfully")

pc = Pinecone(api_key=os.environ["PINECONE_API_KEY"])

model = SentenceTransformer("all-MiniLM-L6-v2")

print("Sentence-Transformer model loaded successfully")

Pinecone API key loaded successfully


Sentence-Transformer model loaded successfully


In [41]:
INDEX_NAME = "cuad-index-minilm"   
DIMENSION = 384                  

existing_indexes = [idx["name"] for idx in pc.list_indexes()]

if INDEX_NAME not in existing_indexes:
    pc.create_index(
        name=INDEX_NAME,
        dimension=DIMENSION,
        metric="cosine",
        spec=ServerlessSpec(
            cloud="aws",
            region="us-east-1"
        )
    )

index = pc.Index(INDEX_NAME)

print(f"Connected to Pinecone index: {INDEX_NAME}")

Connected to Pinecone index: cuad-index-minilm


In [42]:
# Building RAG Search Wrapper
import re
import json
import matplotlib.pyplot as plt
from typing import List, Dict

# # Sentence-Transformers embedding function for Pinecone
def embed_batch(texts):
    return model.encode(
        texts,
        show_progress_bar=False,
        convert_to_numpy=True
    )

# Embed a Query
def embed_query(query: str):
    return embed_batch([query])[0].tolist()

In [43]:
# Embed records
from sentence_transformers import SentenceTransformer
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

# Embed each record
for rec in records:
    rec["embedding"] = embedding_model.encode(rec["text"]).tolist()

print(f"Embedded {len(records)} agent records")

Embedded 4 agent records


Generated vector embeddings for each agent output.

In [44]:
# Store in vector DB
pinecone_vectors = []

for i, rec in enumerate(records):
    pinecone_vectors.append({
        "id": f"agent_memory_{rec['agent']}",
        "values": rec["embedding"],
        "metadata": rec["metadata"]
    })

len(pinecone_vectors)

4

In [45]:
index.upsert(vectors=pinecone_vectors)

print(f"Upserted {len(pinecone_vectors)} agent memory vectors into Pinecone")

Upserted 4 agent memory vectors into Pinecone


Pine cone vectors are prepared and then upserted where they are stored in the vector DB.

In [46]:
# Verify Storage
index.describe_index_stats()

{'_response_info': {'raw_headers': {'connection': 'keep-alive',
                                    'content-length': '189',
                                    'content-type': 'application/json',
                                    'date': 'Sun, 18 Jan 2026 06:45:21 GMT',
                                    'grpc-status': '0',
                                    'server': 'envoy',
                                    'x-envoy-upstream-service-time': '41',
                                    'x-pinecone-request-id': '3003813227094363939',
                                    'x-pinecone-request-latency-ms': '40',
                                    'x-pinecone-response-duration-ms': '42'}},
 'dimension': 384,
 'index_fullness': 0.0,
 'memoryFullness': 0.0,
 'metric': 'cosine',
 'namespaces': {'__default__': {'vector_count': 58335}},
 'storageFullness': 0.0,
 'total_vector_count': 58335,
 'vector_type': 'dense'}

Verified successful storage of agent outputs in the vector database.

In [47]:
from datetime import datetime

records = []

CONTRACT_ID = "contract_001"    # If you know contract ID dynamically, replace this.

for agent, output in agent_outputs.items():
    record = {
        "agent": agent,
        "contract_id": CONTRACT_ID,
        "timestamp": datetime.utcnow().isoformat(),

        "text": agent_output_to_text(agent, output),

        "metadata": {
            "agent": agent,
            "contract_id": CONTRACT_ID,
            "timestamp": datetime.utcnow().isoformat(),
            "risk_level": output.get("risk_level"),
            "confidence": output.get("confidence")
        }
    }

    records.append(record)

len(records)

  "timestamp": datetime.utcnow().isoformat(),
  "timestamp": datetime.utcnow().isoformat(),


4

In [48]:
from sentence_transformers import SentenceTransformer

embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

for rec in records:
    rec["embedding"] = embedding_model.encode(rec["text"]).tolist()

print("Embeddings created for all agent records.")

Embeddings created for all agent records.


In [49]:
pinecone_vectors = []

for i, rec in enumerate(records):
    pinecone_vectors.append({
        "id": f"{rec['agent']}_{rec['contract_id']}_memory_{i}",
        "values": rec["embedding"],
        "metadata": rec["metadata"]
    })

len(pinecone_vectors)

4

In [50]:
index.upsert(vectors=pinecone_vectors)

print(f"Successfully stored {len(pinecone_vectors)} agent memory vectors in Pinecone.")

Successfully stored 4 agent memory vectors in Pinecone.


In [51]:
index.describe_index_stats()

{'_response_info': {'raw_headers': {'connection': 'keep-alive',
                                    'content-length': '189',
                                    'content-type': 'application/json',
                                    'date': 'Sun, 18 Jan 2026 06:45:27 GMT',
                                    'grpc-status': '0',
                                    'server': 'envoy',
                                    'x-envoy-upstream-service-time': '39',
                                    'x-pinecone-request-id': '8379900452772418719',
                                    'x-pinecone-request-latency-ms': '38',
                                    'x-pinecone-response-duration-ms': '40'}},
 'dimension': 384,
 'index_fullness': 0.0,
 'memoryFullness': 0.0,
 'metric': 'cosine',
 'namespaces': {'__default__': {'vector_count': 58335}},
 'storageFullness': 0.0,
 'total_vector_count': 58335,
 'vector_type': 'dense'}

In [52]:
query_vec = embedding_model.encode("Show compliance requirements").tolist()

result = index.query(
    vector=query_vec,
    top_k=3,
    include_metadata=True
)

result

QueryResponse(matches=[{'id': 'KINGPHARMACEUTICALSINC_08_09_2006-EX-10.1-PROMOTION '
       'AGREEMENT_cleaned_chunk_113',
 'metadata': {'chunk_id': 113,
              'contract_id': 'KINGPHARMACEUTICALSINC_08_09_2006-EX-10.1-PROMOTION '
                             'AGREEMENT_cleaned',
              'text': 'Section 5.3 Compliance'},
 'score': 0.67091465,
 'values': []}, {'id': 'KINGPHARMACEUTICALSINC_08_09_2006-EX-10_1-PROMOTION_AGREEMENT_cleaned_chunk_113',
 'metadata': {'chunk_id': 113,
              'contract_id': 'KINGPHARMACEUTICALSINC_08_09_2006-EX-10.1-PROMOTION '
                             'AGREEMENT_cleaned',
              'text': 'Section 5.3 Compliance'},
 'score': 0.670913756,
 'values': []}, {'id': 'ExactSciencesCorp_20180822_8-K_EX-10.1_11331629_EX-10.1_Promotion '
       'Agreement_cleaned_chunk_208',
 'metadata': {'chunk_id': 208,
              'contract_id': 'ExactSciencesCorp_20180822_8-K_EX-10.1_11331629_EX-10.1_Promotion '
                             'Agreement

QUERYING STORED AGENT MEMORY (RECALL & REUSE)

In [53]:
# Define Memory Query Function
def query_agent_memory(query_text, top_k=5, agent_filter=None):
    query_vec = embedding_model.encode(query_text).tolist()

    # default filter -> only agent-memory vectors
    pinecone_filter = {"risk_level": {"$exists": True}}

    # optional agent filter
    if agent_filter:
        pinecone_filter["agent"] = agent_filter

    results = index.query(
        vector=query_vec,
        top_k=top_k,
        include_metadata=True,
        filter=pinecone_filter
    )
    return results

Defined a helper to retrieve stored agent outputs from the vector database with optional agent filtering.

In [54]:
# Query Legal Memory
legal_results = query_agent_memory(
    query_text="termination clauses or legal obligations",
    agent_filter="legal",
    top_k=3
)

legal_results


QueryResponse(matches=[{'id': 'legal_contract_001_memory_0',
 'metadata': {'agent': 'legal',
              'confidence': 0.95,
              'contract_id': 'contract_001',
              'risk_level': 'Medium',
              'timestamp': '2026-01-18T06:45:13.352221'},
 'score': 0.564453125,
 'values': []}, {'id': 'agent_memory_legal',
 'metadata': {'agent': 'legal', 'confidence': 0.95, 'risk_level': 'Medium'},
 'score': 0.564453125,
 'values': []}], namespace='', usage={'read_units': 1}, _response_info={'raw_headers': {'date': 'Sun, 18 Jan 2026 06:45:29 GMT', 'content-type': 'application/json', 'content-length': '402', 'connection': 'keep-alive', 'x-pinecone-max-indexed-lsn': '999', 'x-pinecone-request-latency-ms': '6', 'x-pinecone-request-id': '3436597966783235708', 'x-envoy-upstream-service-time': '4', 'x-pinecone-response-duration-ms': '8', 'grpc-status': '0', 'server': 'envoy'}})

Queried stored legal analysis from vector memory.

In [55]:
# Inspect Retrieved Memory
for match in legal_results["matches"]:
    metadata = match.get("metadata", {})

    print("\n--- Result ---")
    print("Score:", match.get("score"))
    print("Agent:", metadata.get("agent", "unknown"))
    print("Risk:", metadata.get("risk_level", "N/A"))
    print("Confidence:", metadata.get("confidence", "N/A"))
    print("Timestamp:", metadata.get("timestamp", "N/A"))


--- Result ---
Score: 0.564453125
Agent: legal
Risk: Medium
Confidence: 0.95
Timestamp: 2026-01-18T06:45:13.352221

--- Result ---
Score: 0.564453125
Agent: legal
Risk: Medium
Confidence: 0.95
Timestamp: N/A


Verified retrieved legal agent records and associated metadata.

In [56]:
# Retrieve Finance Memory
finance_results = query_agent_memory(
    query_text="payment terms and financial penalties",
    agent_filter="finance",
    top_k=3
)

finance_results

QueryResponse(matches=[{'id': 'agent_memory_finance',
 'metadata': {'agent': 'finance', 'confidence': 0.88, 'risk_level': 'high'},
 'score': 0.466965675,
 'values': []}, {'id': 'finance_contract_001_memory_2',
 'metadata': {'agent': 'finance',
              'confidence': 0.88,
              'contract_id': 'contract_001',
              'risk_level': 'high',
              'timestamp': '2026-01-18T06:45:13.353516'},
 'score': 0.466965675,
 'values': []}], namespace='', usage={'read_units': 1}, _response_info={'raw_headers': {'date': 'Sun, 18 Jan 2026 06:45:29 GMT', 'content-type': 'application/json', 'content-length': '406', 'connection': 'keep-alive', 'x-pinecone-max-indexed-lsn': '999', 'x-pinecone-request-latency-ms': '11', 'x-pinecone-request-id': '3906875467410972017', 'x-envoy-upstream-service-time': '6', 'x-pinecone-response-duration-ms': '12', 'grpc-status': '0', 'server': 'envoy'}})

Fetched stored finance-related agent outputs from memory.

In [57]:
# Combine Memory Responses
combined_memory = {
    "legal_memory": legal_results["matches"],
    "finance_memory": finance_results["matches"]
}

combined_memory

{'legal_memory': [{'id': 'legal_contract_001_memory_0',
   'metadata': {'agent': 'legal',
                'confidence': 0.95,
                'contract_id': 'contract_001',
                'risk_level': 'Medium',
                'timestamp': '2026-01-18T06:45:13.352221'},
   'score': 0.564453125,
   'values': []},
  {'id': 'agent_memory_legal',
   'metadata': {'agent': 'legal', 'confidence': 0.95, 'risk_level': 'Medium'},
   'score': 0.564453125,
   'values': []}],
 'finance_memory': [{'id': 'agent_memory_finance',
   'metadata': {'agent': 'finance', 'confidence': 0.88, 'risk_level': 'high'},
   'score': 0.466965675,
   'values': []},
  {'id': 'finance_contract_001_memory_2',
   'metadata': {'agent': 'finance',
                'confidence': 0.88,
                'contract_id': 'contract_001',
                'risk_level': 'high',
                'timestamp': '2026-01-18T06:45:13.353516'},
   'score': 0.466965675,
   'values': []}]}

Merged retrieved memory from multiple agents for reuse.

In [58]:
# Use Memory Instead of Re-running Agents
def memory_to_text(matches):
    texts = []
    for m in matches:
        meta = m.get("metadata", {})
        agent = meta.get("agent", "unknown")
        risk = meta.get("risk_level", "N/A")
        conf = meta.get("confidence", "N/A")
        timestamp = meta.get("timestamp", "N/A")

        summary_line = (
            f"[{agent.upper()}] "
            f"Risk: {risk} | "
            f"Confidence: {conf} | "
            f"Timestamp: {timestamp}"
        )

        texts.append(summary_line)

    return "\n".join(texts)

legal_results = query_agent_memory("termination clause", agent_filter="legal")
finance_results = query_agent_memory("financial penalties", agent_filter="finance")

In [59]:
# Combine legal + finance results
combined_matches = legal_results["matches"] + finance_results["matches"]

# Convert memory into usable summary text
memory_summary = memory_to_text(combined_matches)

print(memory_summary)

[LEGAL] Risk: Medium | Confidence: 0.95 | Timestamp: 2026-01-18T06:45:13.352221
[LEGAL] Risk: Medium | Confidence: 0.95 | Timestamp: N/A
[FINANCE] Risk: high | Confidence: 0.88 | Timestamp: N/A
[FINANCE] Risk: high | Confidence: 0.88 | Timestamp: 2026-01-18T06:45:13.353516


Reused stored agent outputs as context instead of re-running agents.

In [60]:
# Query without agent filter
all_memory_results = query_agent_memory(
    query_text="contract risks, compliance obligations, payment terms, operational risks",
    agent_filter=None,   
    top_k=10
)

all_memory_results

QueryResponse(matches=[{'id': 'compliance_refined_memory',
 'metadata': {'agent': 'compliance_refined',
              'confidence': 0.95,
              'risk_level': 'high'},
 'score': 0.588075221,
 'values': []}, {'id': 'agent_memory_operations',
 'metadata': {'agent': 'operations',
              'confidence': 0.85,
              'risk_level': 'medium'},
 'score': 0.494646072,
 'values': []}, {'id': 'operations_contract_001_memory_3',
 'metadata': {'agent': 'operations',
              'confidence': 0.85,
              'contract_id': 'contract_001',
              'risk_level': 'medium',
              'timestamp': '2026-01-18T06:45:13.353516'},
 'score': 0.494646072,
 'values': []}, {'id': 'legal_contract_001_memory_0',
 'metadata': {'agent': 'legal',
              'confidence': 0.95,
              'contract_id': 'contract_001',
              'risk_level': 'Medium',
              'timestamp': '2026-01-18T06:45:13.352221'},
 'score': 0.474999428,
 'values': []}, {'id': 'agent_memory_lega

In [61]:
# Inspect retrieved memory
for match in all_memory_results["matches"]:
    meta = match.get("metadata", {})
    
    print("\n--- Memory Record ---")
    print("Agent:", meta.get("agent", "unknown"))
    print("Risk Level:", meta.get("risk_level", "N/A"))
    print("Confidence:", meta.get("confidence", "N/A"))
    print("Contract ID:", meta.get("contract_id", "N/A"))
    print("Timestamp:", meta.get("timestamp", "N/A"))


--- Memory Record ---
Agent: compliance_refined
Risk Level: high
Confidence: 0.95
Contract ID: N/A
Timestamp: N/A

--- Memory Record ---
Agent: operations
Risk Level: medium
Confidence: 0.85
Contract ID: N/A
Timestamp: N/A

--- Memory Record ---
Agent: operations
Risk Level: medium
Confidence: 0.85
Contract ID: contract_001
Timestamp: 2026-01-18T06:45:13.353516

--- Memory Record ---
Agent: legal
Risk Level: Medium
Confidence: 0.95
Contract ID: contract_001
Timestamp: 2026-01-18T06:45:13.352221

--- Memory Record ---
Agent: legal
Risk Level: Medium
Confidence: 0.95
Contract ID: N/A
Timestamp: N/A

--- Memory Record ---
Agent: finance
Risk Level: high
Confidence: 0.88
Contract ID: N/A
Timestamp: N/A

--- Memory Record ---
Agent: finance
Risk Level: high
Confidence: 0.88
Contract ID: contract_001
Timestamp: 2026-01-18T06:45:13.353516

--- Memory Record ---
Agent: compliance
Risk Level: high
Confidence: 0.95
Contract ID: contract_001
Timestamp: 2026-01-18T06:45:13.353516

--- Memory Reco

In [62]:
# Compare risks across agents
def compare_agent_risks(matches):
    comparison = {}

    for m in matches:
        meta = m.get("metadata", {})
        agent = meta.get("agent", "unknown")

        if agent not in comparison:
            comparison[agent] = {
                "risk_level": meta.get("risk_level", "N/A"),
                "confidence": meta.get("confidence", "N/A")
            }

    return comparison
risk_comparison = compare_agent_risks(all_memory_results["matches"])
risk_comparison

{'compliance_refined': {'risk_level': 'high', 'confidence': 0.95},
 'operations': {'risk_level': 'medium', 'confidence': 0.85},
 'legal': {'risk_level': 'Medium', 'confidence': 0.95},
 'finance': {'risk_level': 'high', 'confidence': 0.88},
 'compliance': {'risk_level': 'high', 'confidence': 0.95},
 'legal_refined': {'risk_level': 'Medium', 'confidence': 0.8}}

Agent memory was queried without filtering by agent type, enabling retrieval of all stored analyses.  
The retrieved records were used to compare risk levels and confidence scores across Legal, Compliance, Finance, and Operations agents without re-running any models.

CROSS AGENT REFINEMENT

In [63]:
# Retrieve All Agent Memories
all_agent_memory = query_agent_memory(
    query_text="contract risk analysis",
    agent_filter=None,
    top_k=10
)

Retrieved stored agent memories and constructed a shared risk context summarizing risk levels across all agents. 

In [64]:
# Build Shared Context
shared_context = "\n".join([
    f"{m.get('metadata', {}).get('agent', 'unknown')} "
    f"risk: {m.get('metadata', {}).get('risk_level', 'N/A')}"
    for m in all_agent_memory["matches"]
])

print(shared_context)

compliance_refined risk: high
operations risk: medium
operations risk: medium
operations_refined risk: Medium
legal_refined risk: Medium
finance_refined risk: high
finance risk: high
finance risk: high
legal risk: Medium
legal risk: Medium


Constructed a shared context summarizing risk signals from all agents.

Refined Legal Agent

In [82]:
# Pass Context to Legal Agent
refined_legal = legal_agent.run(
    f"""
Based on the following agent risk assessments:
{shared_context}

Re-evaluate and refine the legal risk.
Return ONLY valid JSON.
"""
)

refined_legal

'```json\n{\n  "extracted_clauses": [\n    {\n      "clause_type": "Termination",\n      "clause_text": "The Company shall have the right to terminate this Agreement upon thirty (30) days’ written notice to the other party if such party fails to perform or cure any breach or default within said thirty (30) days.",\n      "risk_level": "Medium",\n      "confidence": 0.85,\n      "evidence": [\n        "The Company shall have the right to terminate this Agreement upon thirty (30) days’ written notice to the other party if such party fails to perform or cure any breach or default within said thirty (30) days."\n      ]\n    },\n    {\n      "clause_type": "Governing Law",\n      "clause_text": "This Agreement shall be governed by and construed in accordance with the laws of the State of Delaware.",\n      "risk_level": "Low",\n      "confidence": 0.95,\n      "evidence": [\n        "This Agreement shall be governed by and construed in accordance with the laws of the State of Delaware."\n 

In [91]:
refined_legal_output = validate_agent_output(refined_legal, clause_type="Legal")
refined_legal_output

{'clause_type': 'Legal',
 'extracted_clauses': [{'clause_type': 'Termination',
   'clause_text': 'The Company shall have the right to terminate this Agreement upon thirty (30) days’ written notice to the other party if such party fails to perform or cure any breach or default within said thirty (30) days.',
   'risk_level': 'Medium',
   'confidence': 0.85,
   'evidence': ['The Company shall have the right to terminate this Agreement upon thirty (30) days’ written notice to the other party if such party fails to perform or cure any breach or default within said thirty (30) days.']},
  {'clause_type': 'Governing Law',
   'clause_text': 'This Agreement shall be governed by and construed in accordance with the laws of the State of Delaware.',
   'risk_level': 'Low',
   'confidence': 0.95,
   'evidence': ['This Agreement shall be governed by and construed in accordance with the laws of the State of Delaware.']},
  {'clause_type': 'Jurisdiction',
   'clause_text': 'Any legal action arising o

Provided the Legal Agent with risk signals from other agents to re-evaluate and refine the legal risk assessment.

In [83]:
# Update legal memory 
refined_legal_output = validate_agent_output(
    refined_legal,
    clause_type="Legal"
)

# Convert to text and embed
refined_text = agent_output_to_text("legal_refined", refined_legal_output)
refined_embedding = embedding_model.encode(refined_text).tolist()

index.upsert([
    {
        "id": "legal_refined_memory",
        "values": refined_embedding,
        "metadata": {
            "agent": "legal_refined",
            "risk_level": refined_legal_output["risk_level"],
            "confidence": refined_legal_output["confidence"]
        }
    }
])

print("Refined legal memory stored.")

Refined legal memory stored.


Stored the refined legal assessment back into the vector database, enabling persistent cross-agent reasoning.

In [84]:
# save refined legal output
import json
import os

SAVE_DIR = "refined_outputs"
os.makedirs(SAVE_DIR, exist_ok=True)

with open(os.path.join(SAVE_DIR, "legal_refined_output.json"), "w", encoding="utf-8") as f:
    json.dump(refined_legal_output, f, indent=2)

print("Saved → refined_outputs/legal_refined_output.json")

Saved → refined_outputs/legal_refined_output.json


Refined Finance Agent

In [98]:
FINANCE_REFINEMENT_PROMPT = f"""
You are a Finance Risk Analysis Agent.

Below is the contract text:
----------------
{combined_finance_text}
----------------

Below are risk signals from other agents (for context only):
----------------
{shared_context}
----------------

Your task:
1. Extract finance-related clauses ONLY from the contract text (not from agent risks).
2. Clause text MUST be copied verbatim from the contract.
3. Use other agents' risks ONLY to adjust the overall finance risk level if needed.
4. Do NOT treat agent risk statements as clauses.

Return ONLY valid JSON in this format:
{{
  "clause_type": "Finance",
  "extracted_clauses": [
    {{
      "clause_type": "",
      "clause_text": "",
      "risk_level": "",
      "confidence": 0.0,
      "evidence": [""]
    }}
  ],
  "risk_level": "",
  "confidence": 0.0,
  "evidence": []
}}
"""

In [None]:
# Initialize Refined Finance Agent
finance_agent = BaseAgent(
    agent_name="Finance Agent (Refined)",
    system_prompt=FINANCE_REFINEMENT_PROMPT,
    model="gemma3:4b"
)

In [101]:
# Pass shared context to Compliance Agent
refined_finance_raw = finance_agent.run("Refine finance clauses")

refined_finance_output = validate_agent_output(
    refined_finance_raw,
    clause_type="Finance"
)

refined_finance_output

{'clause_type': 'Finance',
 'extracted_clauses': [{'clause_type': 'Late Fees and Collection Costs',
   'clause_text': 'If Buyer fails to pay Seller an amount owed under this Agreement by the invoice due date, then Buyer will owe Seller: (i) the delinquent amount; and (ii) a late payment fee equal to two percent (2%) of the delinquent amount for each full or partial calendar month past the invoice due date that the delinquent amount remains unpaid.',
   'risk_level': 'High',
   'confidence': 0.95,
   'evidence': ['Section 3.2 TERMS OF PAYMENT']},
  {'clause_type': 'Late Fees and Collection Costs',
   'clause_text': 'Delinquent payments are subject to a late payment charge of one and one half percent (1.5%) per month commencing sixty (60) days from the invoice date.',
   'risk_level': 'High',
   'confidence': 0.95,
   'evidence': ['Section 3.2 TERMS OF PAYMENT']},
  {'clause_type': 'Interest on Late Payments',
   'clause_text': 'Any amount that is not paid when due will bear simple inter

In [103]:
# Convert to text and embed
refined_text = agent_output_to_text("finance_refined", refined_finance_output)
refined_embedding = embedding_model.encode(refined_text).tolist()

index.upsert([
    {
        "id": "finance_refined_memory",
        "values": refined_embedding,
        "metadata": {
            "agent": "finance_refined",
            "risk_level": refined_finance_output["risk_level"],
            "confidence": refined_finance_output["confidence"]
        }
    }
])

print("Refined finance memory stored.")

Refined finance memory stored.


In [104]:
import json
import os

SAVE_DIR = "refined_outputs"
os.makedirs(SAVE_DIR, exist_ok=True)

with open(os.path.join(SAVE_DIR, "finance_refined_output.json"), "w", encoding="utf-8") as f:
    json.dump(refined_finance_output, f, indent=2)

print("Saved → refined_outputs/finance_refined_output.json")

Saved → refined_outputs/finance_refined_output.json


Refined Compliance Agent through Finance output

In [105]:
COMPLIANCE_REFINEMENT_PROMPT = f"""
You are a Compliance Risk Analysis Agent.

Below is the contract text (retrieved for compliance context):
----------------
{combined_compliance_text}
----------------

Below is the refined Finance Agent output (use only for risk influence, NOT as clause text):
----------------
{refined_finance_output}
----------------

Your task:
1. Extract compliance-related clauses ONLY from the contract text.
   These may include:
   - Data protection
   - Regulatory obligations
   - Audits and reporting
   - Privacy / security
   - Compliance with laws
   - Certifications (GDPR, HIPAA, SOC2, ISO)

2. DO NOT treat Finance Agent text as contract clauses.
3. Use financial risks ONLY to adjust overall compliance risk (financial penalties often raise compliance exposure).
4. All clause_text MUST be copied verbatim from the contract.
5. Produce the final output in this JSON schema:

{{
  "clause_type": "Compliance",
  "extracted_clauses": [
    {{
      "clause_type": "",
      "clause_text": "",
      "risk_level": "",
      "confidence": 0.0,
      "evidence": [""]
    }}
  ],
  "risk_level": "",
  "confidence": 0.0,
  "evidence": []
}}
"""

In [106]:
# initialize Refined Compliance Agent
compliance_agent_refined = BaseAgent(
    agent_name="Compliance Agent (Refined)",
    system_prompt=COMPLIANCE_REFINEMENT_PROMPT,
    model="gemma3:4b"
)

In [107]:
refined_compliance_raw = compliance_agent_refined.run(
    "Re-evaluate compliance clauses using finance risk."
)

refined_compliance_output = validate_agent_output(
    refined_compliance_raw,
    clause_type="Compliance"
)

refined_compliance_output

{'clause_type': 'Compliance',
 'extracted_clauses': [{'clause_type': 'Regulatory Matters',
   'clause_text': '16.1 Privacy and Security Matters. Concurrently with the execution of this Agreement, the Parties are executing a HIPAA Business Associate Agreement (the "BAA") in the form attached hereto as Exhibit E.',
   'risk_level': 'High',
   'confidence': 0.98,
   'evidence': ['16.1']},
  {'clause_type': 'Technical Standards',
   'clause_text': '16.2 Technical Standards. The Company will provide Allscripts with Updates so that the Subscription Software Services can be implemented and configured to comply in all material respects with applicable privacy and security standards (e.g., HITECH, HIPAA, and Omnibus rule) within a reasonably practicable timeframe (based on the scope of required enhancements and other factors) after their final, formal adoption and publication by the Secretary of the U.S. Department of Health and Human Services.',
   'risk_level': 'High',
   'confidence': 0.95,


In [108]:
# Store refined compliance in vector DB

# Convert refined output to text
compliance_refined_text = agent_output_to_text(
    "compliance_refined",
    refined_compliance_output
)

# Embed for Pinecone
compliance_refined_embedding = embedding_model.encode(
    compliance_refined_text
).tolist()

# Upsert in vector DB
index.upsert([
    {
        "id": "compliance_refined_memory",
        "values": compliance_refined_embedding,
        "metadata": {
            "agent": "compliance_refined",
            "risk_level": refined_compliance_output["risk_level"],
            "confidence": refined_compliance_output["confidence"]
        }
    }
])

print("Refined compliance memory stored.")

Refined compliance memory stored.


In [109]:
# save refined compliance output
SAVE_DIR = "refined_outputs"
os.makedirs(SAVE_DIR, exist_ok=True)

with open(os.path.join(SAVE_DIR, "compliance_refined_output.json"), "w", encoding="utf-8") as f:
    json.dump(refined_compliance_output, f, indent=2)

print("Saved → refined_outputs/compliance_refined_output.json")

Saved → refined_outputs/compliance_refined_output.json


Compliance Agent re-evaluated its risk assessment after reading Finance Agent outputs, enabling risk escalation based on financial penalties.

Refined operations Agent through Finance + Compliance Output

In [111]:
OPERATIONS_REFINEMENT_PROMPT = f"""
You are an Operations Risk Analysis Agent.

Below is the contract text (for operational context):
----------------
{combined_operations_text}
----------------

Below is the refined Finance Agent output (for cost & penalty impact):
----------------
{refined_finance_output}
----------------

Below is the refined Compliance Agent output (for regulatory & audit impact):
----------------
{refined_compliance_output}
----------------

Your task:
1. Extract operational clauses ONLY from the contract text.
   These may include:
   - Deliverables
   - Timelines and milestones
   - Service obligations
   - Performance standards / SLAs
   - Execution responsibilities

2. DO NOT treat Finance or Compliance outputs as contract clauses.
3. Use Finance and Compliance risks ONLY to adjust the overall operational risk.
4. Clause text MUST be copied verbatim from the contract.

Return ONLY valid JSON in this format:
{{
  "clause_type": "Operations",
  "extracted_clauses": [
    {{
      "clause_type": "",
      "clause_text": "",
      "risk_level": "",
      "confidence": 0.0,
      "evidence": [""]
    }}
  ],
  "risk_level": "",
  "confidence": 0.0,
  "evidence": []
}}
"""

In [None]:
# Initialize Refined Operations Agent
operations_agent_refined = BaseAgent(
    agent_name="Operations Agent (Refined)",
    system_prompt=OPERATIONS_REFINEMENT_PROMPT,
    model="gemma3:4b"
)

In [113]:
refined_operations_raw = operations_agent_refined.run(
    "Re-evaluate operational execution risk using finance and compliance context."
)

refined_operations_output = validate_agent_output(
    refined_operations_raw,
    clause_type="Operations"
)

refined_operations_output

{'clause_type': 'Operations',
 'extracted_clauses': [{'clause_type': 'SLA Review',
   'clause_text': 'A joint Metavante-Customer team (the "SLA Team") shall review, evaluate and potentially modify the Service Level Changes and associated Business Case Assessments.',
   'risk_level': 'Medium',
   'confidence': 0.97,
   'evidence': ['3. SLA TEAM REVIEW']},
  {'clause_type': 'Resource Assignment',
   'clause_text': 'Both Parties agree to assign sufficient resources and personnel to discharge their respective responsibilities under this Agreement in a timely manner and at all times operating using a professional standard of work as consistent with industry standards.',
   'risk_level': 'Medium',
   'confidence': 0.95,
   'evidence': ['3.6. Both Parties agree to assign sufficient resources and personnel to discharge their respective responsibilities under this Agreement in a timely manner and at all times operating using a professional standard of work as consistent with industry standards'

In [114]:
# Store operations memory in Vector DB

# Convert refined output to text
operations_refined_text = agent_output_to_text(
    "operations_refined",
    refined_operations_output
)

# Embed
operations_refined_embedding = embedding_model.encode(
    operations_refined_text
).tolist()

# Upsert
index.upsert([
    {
        "id": "operations_refined_memory",
        "values": operations_refined_embedding,
        "metadata": {
            "agent": "operations_refined",
            "risk_level": refined_operations_output["risk_level"],
            "confidence": refined_operations_output["confidence"]
        }
    }
])

print("Refined operations memory stored.")

Refined operations memory stored.


In [115]:
SAVE_DIR = "refined_outputs"
os.makedirs(SAVE_DIR, exist_ok=True)

with open(os.path.join(SAVE_DIR, "operations_refined_output.json"), "w", encoding="utf-8") as f:
    json.dump(refined_operations_output, f, indent=2)

print("Saved → refined_outputs/operations_refined_output.json")

Saved → refined_outputs/operations_refined_output.json


Saved Operations agent refined output using Compliance + Finance output.

FINAL CONTRACT LEVEL JSON OUTPUT

In [116]:
# Define final output schema
FINAL_CONTRACT_OUTPUT_SCHEMA = {
    "contract_id": "",
    "generated_at": "",
    "overall_risk_level": "",
    "legal_analysis": {},
    "finance_analysis": {},
    "compliance_analysis": {},
    "operations_analysis": {}
}

FINAL_CONTRACT_OUTPUT_SCHEMA

{'contract_id': '',
 'generated_at': '',
 'overall_risk_level': '',
 'legal_analysis': {},
 'finance_analysis': {},
 'compliance_analysis': {},
 'operations_analysis': {}}

Created a unified JSON structure containing agent outputs, overall risk, confidence summary, and extracted high-risk clauses.

In [118]:
# Retrieve latest refined memories

import json
import os

REFINED_DIR = "refined_outputs"

def load_refined(name):
    path = os.path.join(REFINED_DIR, name)
    with open(path, "r", encoding="utf-8") as f:
        return json.load(f)

refined_legal_output = load_refined("legal_refined_output.json")
refined_finance_output = load_refined("finance_refined_output.json")
refined_compliance_output = load_refined("compliance_refined_output.json")
refined_operations_output = load_refined("operations_refined_output.json")

print("Loaded all refined agent outputs.")

Loaded all refined agent outputs.


The most recent refined outputs from all agents were loaded from disk to ensure the final contract analysis uses validated and updated results.

In [119]:
# Merge All Agent Outputs Into One Structure

final_output = {
    "contract_id": "contract_001",   # or dynamically insert your contract ID
    "generated_at": "",
    "overall_risk_level": "",
    "legal_analysis": refined_legal_output,
    "finance_analysis": refined_finance_output,
    "compliance_analysis": refined_compliance_output,
    "operations_analysis": refined_operations_output
}

Refined outputs from Legal, Finance, Compliance, and Operations agents were combined into a single structured object representing the complete contract analysis.

In [120]:
# Compute Overall Risk Level
def compute_overall_risk(final_output):
    risks = [
        final_output["legal_analysis"]["risk_level"].lower(),
        final_output["finance_analysis"]["risk_level"].lower(),
        final_output["compliance_analysis"]["risk_level"].lower(),
        final_output["operations_analysis"]["risk_level"].lower()
    ]
    
    # Priority system: high > medium > low
    if "high" in risks:
        return "High"
    elif "medium" in risks:
        return "Medium"
    else:
        return "Low"

overall_risk = compute_overall_risk(final_output)
final_output["overall_risk_level"] = overall_risk

overall_risk

'High'

An overall contract risk level was computed by aggregating individual agent risks using a priority rule where High risk overrides Medium and Low.

In [121]:
from datetime import datetime

final_output["generated_at"] = datetime.utcnow().isoformat() + "Z"

  final_output["generated_at"] = datetime.utcnow().isoformat() + "Z"


A UTC timestamp was added to the final output to record when the contract analysis was generated, supporting traceability and versioning.

In [122]:
FINAL_DIR = "final_contract_outputs"
os.makedirs(FINAL_DIR, exist_ok=True)

output_path = os.path.join(FINAL_DIR, "final_contract_analysis.json")

with open(output_path, "w", encoding="utf-8") as f:
    json.dump(final_output, f, indent=2)

print("Saved →", output_path)

Saved → final_contract_outputs\final_contract_analysis.json


Report Template Design (Human-Readable Output)

In [125]:
# Define Report Sections

REPORT_SECTIONS = {
    "title": "Contract Risk Analysis Report",
    "sections": [
        "Executive Summary",
        "Legal Analysis",
        "Finance Analysis",
        "Compliance Analysis",
        "Operations Analysis",
        "Overall Risk Assessment"
    ]
}

We initialized a standardized report structure containing all the major sections required for a human-readable summary. This serves as the outline for converting agent outputs into a narrative report.

In [126]:
# final JSON output into structured text blocks

def extract_clauses_for_report(agent_output):
    clauses = agent_output.get("extracted_clauses", [])
    
    if not clauses:
        return "No specific clauses identified.\n"
    
    text_block = ""
    for c in clauses:
        text_block += f"• **{c.get('clause_type', 'Clause')}**:\n"
        text_block += f"  - Clause Text: {c.get('clause_text', '').strip()}\n"
        text_block += f"  - Risk Level: {c.get('risk_level', '')}\n"
        text_block += f"  - Confidence: {c.get('confidence', '')}\n\n"
    return text_block


def build_report_sections(final_output):
    report = REPORT_SECTIONS.copy()

    # Overall Risk
    report["overall_risk"] = (
        f"Overall Contract Risk: **{final_output['overall_risk_level']}**\n"
    )

    # Individual sections
    report["legal_findings"] = extract_clauses_for_report(
        final_output["legal_analysis"]
    )
    report["finance_findings"] = extract_clauses_for_report(
        final_output["finance_analysis"]
    )
    report["compliance_findings"] = extract_clauses_for_report(
        final_output["compliance_analysis"]
    )
    report["operations_findings"] = extract_clauses_for_report(
        final_output["operations_analysis"]
    )

    return report


report_sections = build_report_sections(final_output)
report_sections

{'title': 'Contract Risk Analysis Report',
 'sections': ['Executive Summary',
  'Legal Analysis',
  'Finance Analysis',
  'Compliance Analysis',
  'Operations Analysis',
  'Overall Risk Assessment'],
 'overall_risk': 'Overall Contract Risk: **High**\n',
 'legal_findings': '• **Termination**:\n  - Clause Text: The Company shall have the right to terminate this Agreement upon thirty (30) days’ written notice to the other party if such party fails to perform or cure any breach or default within said thirty (30) days.\n  - Risk Level: Medium\n  - Confidence: 0.85\n\n• **Governing Law**:\n  - Clause Text: This Agreement shall be governed by and construed in accordance with the laws of the State of Delaware.\n  - Risk Level: Low\n  - Confidence: 0.95\n\n• **Jurisdiction**:\n  - Clause Text: Any legal action arising out of or relating to this Agreement shall be subject to the exclusive jurisdiction of the courts located in the State of Delaware.\n  - Risk Level: Medium\n  - Confidence: 0.8\n\

The refined agent outputs were parsed into readable bullet-point summaries. Each clause was formatted with its type, text, risk level, and confidence, enabling clear human interpretation of model findings.

In [128]:
# Generate Full Report Content

def build_full_report(sections):
    report = f"""
CONTRACT ANALYSIS REPORT

 Generated At:
{final_output['generated_at']}

 Contract ID:
{final_output['contract_id']}


Executive Summary

This report summarizes legal, financial, compliance, and operational risks extracted from the contract.  
The overall risk classification is: **{final_output['overall_risk_level']}**.


Legal Findings

{sections['legal_findings']}


Finance Findings

{sections['finance_findings']}


Compliance Findings

{sections['compliance_findings']}


Operations Findings

{sections['operations_findings']}


Overall Risk Assessment

{sections['overall_risk']}

"""
    return report


full_report = build_full_report(report_sections)
print(full_report[:1500]) 


CONTRACT ANALYSIS REPORT

 Generated At:
2026-01-18T10:41:27.693684Z

 Contract ID:
contract_001


Executive Summary

This report summarizes legal, financial, compliance, and operational risks extracted from the contract.  
The overall risk classification is: **High**.


Legal Findings

• **Termination**:
  - Clause Text: The Company shall have the right to terminate this Agreement upon thirty (30) days’ written notice to the other party if such party fails to perform or cure any breach or default within said thirty (30) days.
  - Risk Level: Medium
  - Confidence: 0.85

• **Governing Law**:
  - Clause Text: This Agreement shall be governed by and construed in accordance with the laws of the State of Delaware.
  - Risk Level: Low
  - Confidence: 0.95

• **Jurisdiction**:
  - Clause Text: Any legal action arising out of or relating to this Agreement shall be subject to the exclusive jurisdiction of the courts located in the State of Delaware.
  - Risk Level: Medium
  - Confidence: 0.8


In [130]:
# Save contract analysis report
import os

FINAL_DIR = "final_contract_outputs"
os.makedirs(FINAL_DIR, exist_ok=True)

full_report_path = os.path.join(
    FINAL_DIR,
    "contract_analysis_report.txt"
)

with open(full_report_path, "w", encoding="utf-8") as f:
    f.write(full_report)

print(f"Saved → {full_report_path}")

Saved → final_contract_outputs\contract_analysis_report.txt


All sections were merged into a clear human-readable narrative. The report includes an executive summary, detailed findings for each agent, and an overall risk assessment.

In [129]:
# Preview Executive Summary Only

executive_summary_preview = f"""
EXECUTIVE SUMMARY

Contract ID: {final_output['contract_id']}
Overall Risk Level: {final_output['overall_risk_level']}

Key Highlights:
- Legal risk: {final_output['legal_analysis']['risk_level']}
- Financial risk: {final_output['finance_analysis']['risk_level']}
- Compliance risk: {final_output['compliance_analysis']['risk_level']}
- Operations risk: {final_output['operations_analysis']['risk_level']}
"""

print(executive_summary_preview)


EXECUTIVE SUMMARY

Contract ID: contract_001
Overall Risk Level: High

Key Highlights:
- Legal risk: Medium
- Financial risk: High
- Compliance risk: High
- Operations risk: Medium



A concise executive summary was generated to provide stakeholders a quick overview of contract-level risks without reading the entire report.

In [131]:
# Save Executive Summary

executive_summary_path = os.path.join(
    FINAL_DIR,
    "executive_summary.txt"
)

with open(executive_summary_path, "w", encoding="utf-8") as f:
    f.write(executive_summary_preview)

print(f"Saved → {executive_summary_path}")

Saved → final_contract_outputs\executive_summary.txt


Task - Rewrite Executive Summary (Simple + Bullet Points)

In [133]:
# NEW CONTRACT ANALYSIS REPORT (STRING VARIABLE)

CONTRACT_ANALYSIS_REPORT_NEW = """
CONTRACT ANALYSIS REPORT

Generated At:
2026-01-18T10:41:27.693684Z

Contract ID:
contract_001

----------------------------------
EXECUTIVE SUMMARY
----------------------------------
This report provides a simplified analysis of the contract risks across multiple dimensions.
The overall contract risk is classified as HIGH.

----------------------------------
LEGAL FINDINGS
----------------------------------
- Termination:
  • The contract allows termination with 30 days’ notice if obligations are not met.
  • Risk Level: Medium

- Governing Law:
  • The agreement is governed by the laws of the State of Delaware.
  • Risk Level: Low

- Jurisdiction:
  • Legal disputes must be resolved exclusively in Delaware courts.
  • Risk Level: Medium

----------------------------------
FINANCE FINDINGS
----------------------------------
- Late Fees:
  • Late payments attract penalties of up to 2% per month.
  • Additional late charges of 1.5% per month may apply.
  • Risk Level: High

- Interest on Delayed Payments:
  • Unpaid amounts accrue interest at 10% per annum.
  • Risk Level: High

----------------------------------
COMPLIANCE FINDINGS
----------------------------------
- Regulatory & Privacy Obligations:
  • The contract includes HIPAA, HITECH, and healthcare data protection requirements.
  • Business Associate Agreements (BAAs) may be required.
  • Risk Level: High

- Data Security:
  • Sensitive data must be encrypted at rest and in transit.
  • Risk Level: High

- Legal Compliance:
  • All activities must comply with applicable laws and regulations.
  • Risk Level: Medium

----------------------------------
OPERATIONS FINDINGS
----------------------------------
- Service Levels:
  • SLAs are subject to periodic review and modification.
  • Risk Level: Medium

- Resource Commitments:
  • Both parties must allocate sufficient personnel and resources.
  • Risk Level: Medium

- Availability Measurement:
  • Service availability is formally defined and measured.
  • Risk Level: Low

----------------------------------
OVERALL RISK ASSESSMENT
----------------------------------
Overall Contract Risk: HIGH

The high overall risk is mainly driven by financial penalties and strict compliance requirements.
"""


In [132]:
# NEW EXECUTIVE SUMMARY (STRING VARIABLE)

EXECUTIVE_SUMMARY_NEW = """
EXECUTIVE SUMMARY

Contract ID: contract_001
Overall Risk Level: High

This contract has been assessed across legal, financial, compliance, and operational areas.

Key observations:
- Legal risk is moderate due to termination conditions and jurisdiction requirements.
- Financial risk is high because of strict payment deadlines, late fees, and interest charges.
- Compliance risk is high due to extensive regulatory, privacy, and data protection obligations, including HIPAA requirements.
- Operational risk is moderate, mainly related to service level management and resource commitments.

Overall, this contract presents a high level of risk, primarily driven by financial penalties and regulatory compliance obligations. Careful monitoring and strong internal controls are recommended before execution.
"""

In [134]:
import os

FINAL_DIR = "final_contract_outputs"
os.makedirs(FINAL_DIR, exist_ok=True)

# Save rewritten executive summary
with open(
    os.path.join(FINAL_DIR, "executive_summary_new.txt"),
    "w",
    encoding="utf-8"
) as f:
    f.write(EXECUTIVE_SUMMARY_NEW)

print("Saved → final_contract_outputs/executive_summary_new.txt")

# Save rewritten contract analysis report
with open(
    os.path.join(FINAL_DIR, "contract_analysis_report_new.txt"),
    "w",
    encoding="utf-8"
) as f:
    f.write(CONTRACT_ANALYSIS_REPORT_NEW)

print("Saved → final_contract_outputs/contract_analysis_report_new.txt")

Saved → final_contract_outputs/executive_summary_new.txt
Saved → final_contract_outputs/contract_analysis_report_new.txt


The rewritten executive summary and simplified contract analysis report were stored as Python string variables and saved into the `final_contract_outputs` directory. These human-friendly versions improve readability and prepare the output for reporting or client presentation.

REPORT FORMATTING & TONE CUSTOMIZATION

In [135]:
# Define Tone Templates

TONE_TEMPLATES = {
    "formal": {
        "intro": "This report provides an analytical summary of the contract, including key findings and risk evaluations.",
        "bullet_prefix": "• "
    },
    "simple": {
        "intro": "Here’s an easy-to-understand summary of the contract findings.",
        "bullet_prefix": "- "
    },
    "executive": {
        "intro": "Executive-level summary of contract insights and risks.",
        "bullet_prefix": "► "
    }
}

We defined multiple tone profiles (formal, simple, executive). Each tone controls the introductory text and the bullet style used throughout the report.

In [139]:
# Fomatting function

def format_section(title, data, tone="formal"):
    bullet = TONE_TEMPLATES[tone]["bullet_prefix"]
    lines = []   # collect lines properly

    lines.append(f"\n\n## {title}")

    # Highlight high risk
    risk = data.get("risk_level", "").lower()
    if risk == "high":
        lines.append("⚠ **HIGH RISK DETECTED**")

    clauses = data.get("extracted_clauses", [])

    if not clauses:
        lines.append(f"{bullet}No relevant clauses found.")
    else:
        for c in clauses:
            clause_text = c.get("clause_text", "").strip()
            if clause_text:
                # Ensure each bullet is on its own line
                lines.append(f"{bullet}{clause_text}")

    # Add risk and confidence as separate lines
    lines.append(f"Risk Level: {data.get('risk_level', '')}")
    lines.append(f"Confidence: {data.get('confidence', '')}")

    # Join all lines with proper line breaks
    return "\n".join(lines)

This function formats each section into:
- A heading
- Bullet points
- Automatic high-risk highlighting
- Risk level and confidence values

This ensures all sections are consistently human-readable.

In [144]:
# Format entire report

def format_full_report(final_output, tone="formal"):
    intro = TONE_TEMPLATES[tone]["intro"]

    report_lines = []
    report_lines.append("# CONTRACT ANALYSIS REPORT\n")
    report_lines.append(intro)
    report_lines.append("")  # newline

    # Add each section (each is already properly formatted)
    report_lines.append(format_section("Legal Analysis", final_output["legal_analysis"], tone))
    report_lines.append(format_section("Finance Analysis", final_output["finance_analysis"], tone))
    report_lines.append(format_section("Compliance Analysis", final_output["compliance_analysis"], tone))
    report_lines.append(format_section("Operations Analysis", final_output["operations_analysis"], tone))

    report_lines.append("\n### Overall Risk: " + final_output["overall_risk_level"])
    report_lines.append("Generated At: " + final_output["generated_at"])

    return "\n".join(report_lines)

In [145]:
# Generate formatted full contract analysis report
formatted_contract_analysis_report = format_full_report(
    final_output,
    tone="formal"   # you can switch to "simple" or "executive"
)

# Preview first part
print(formatted_contract_analysis_report[:1200])

# CONTRACT ANALYSIS REPORT

This report provides an analytical summary of the contract, including key findings and risk evaluations.



## Legal Analysis
• The Company shall have the right to terminate this Agreement upon thirty (30) days’ written notice to the other party if such party fails to perform or cure any breach or default within said thirty (30) days.
• This Agreement shall be governed by and construed in accordance with the laws of the State of Delaware.
• Any legal action arising out of or relating to this Agreement shall be subject to the exclusive jurisdiction of the courts located in the State of Delaware.
Risk Level: Medium
Confidence: 0.75


## Finance Analysis
⚠ **HIGH RISK DETECTED**
• If Buyer fails to pay Seller an amount owed under this Agreement by the invoice due date, then Buyer will owe Seller: (i) the delinquent amount; and (ii) a late payment fee equal to two percent (2%) of the delinquent amount for each full or partial calendar month past the invoice due 

We now convert the entire contract output into a structured report, applying the selected tone and formatting rules. All sections use bullet points and show high-risk warnings and then saved in the directory.

In [147]:
import os

FINAL_DIR = "final_contract_outputs"
os.makedirs(FINAL_DIR, exist_ok=True)

formatted_report_path = os.path.join(
    FINAL_DIR,
    "formatted_contract_analysis_report.txt"
)

with open(formatted_report_path, "w", encoding="utf-8") as f:
    f.write(formatted_contract_analysis_report)

print("Saved →", formatted_report_path)

Saved → final_contract_outputs\formatted_contract_analysis_report.txt


In [141]:
def generate_executive_summary(final_output, tone="executive"):
    intro = TONE_TEMPLATES[tone]["intro"]
    bullet = TONE_TEMPLATES[tone]["bullet_prefix"]

    lines = []
    lines.append("# EXECUTIVE SUMMARY\n")
    lines.append(intro)
    lines.append("")

    for section_key in ["legal_analysis", "finance_analysis", "compliance_analysis", "operations_analysis"]:
        sec = final_output[section_key]
        title = section_key.replace("_", " ").title()

        risk = sec.get("risk_level", "").lower()
        risk_label = "⚠ HIGH RISK" if risk == "high" else risk.title()

        lines.append(f"## {title}")
        lines.append(f"{bullet}Risk: {risk_label}")
        lines.append(f"{bullet}Confidence: {sec.get('confidence', '')}")
        lines.append(f"{bullet}Clauses Found: {len(sec.get('extracted_clauses', []))}")
        lines.append("")  # blank line between sections

    lines.append(f"### Overall Contract Risk: {final_output['overall_risk_level']}\n")

    return "\n".join(lines)

In [146]:
# Generate formatted executive summary
executive_summary_new = generate_executive_summary(
    final_output,
    tone="executive"
)

# Preview
print(executive_summary_new)

# EXECUTIVE SUMMARY

Executive-level summary of contract insights and risks.

## Legal Analysis
► Risk: Medium
► Confidence: 0.75
► Clauses Found: 3

## Finance Analysis
► Risk: ⚠ HIGH RISK
► Confidence: 0.95
► Clauses Found: 3

## Compliance Analysis
► Risk: ⚠ HIGH RISK
► Confidence: 0.96
► Clauses Found: 7

## Operations Analysis
► Risk: Medium
► Confidence: 0.96
► Clauses Found: 3

### Overall Contract Risk: High



In [148]:
formatted_exec_summary_path = os.path.join(
    FINAL_DIR,
    "formatted_executive_summary.txt"
)

with open(formatted_exec_summary_path, "w", encoding="utf-8") as f:
    f.write(executive_summary_new)

print("Saved →", formatted_exec_summary_path)

Saved → final_contract_outputs\formatted_executive_summary.txt


We created a concise executive-level summary using bullet points and then saved it in the directory. Each section shows:
- Risk level (with high-risk highlighting)
- Confidence
- Number of clauses extracted

FASTAPI BACKEND FOR CONTRACT ANALYSIS

In [None]:
from fastapi import FastAPI, UploadFile, File, HTTPException
from pydantic import BaseModel
from pipeline.final_pipeline import run_full_analysis
from reporting.formatter import format_full_report, generate_executive_summary

app = FastAPI(title="ClauseAI Contract Analysis API")

class AnalysisRequest(BaseModel):
    tone: str = "executive"    # user can switch: executive/simple/formal


# Helper to read & validate uploaded file 
async def read_uploaded_file(file: UploadFile):
    if not file.filename.lower().endswith(".txt"):
        raise HTTPException(status_code=400, detail="Only .txt files are supported.")

    text = (await file.read()).decode("utf-8").strip()

    # error handling for empty/small files
    if len(text) < 20:
        raise HTTPException(status_code=400, detail="Uploaded file is empty or too small.")

    return text


# API Endpoint 
@app.post("/analyze")
async def analyze_contract(request: AnalysisRequest, file: UploadFile = File(...)):

    # Load and validate file
    contract_text = await read_uploaded_file(file)

    final_output = run_full_analysis(contract_text)

    # Tone selection
    tone = request.tone.lower()
    if tone not in ["executive", "simple", "formal"]:
        raise HTTPException(status_code=400, detail="Tone must be: executive, simple or formal.")

    # Generate report according to tone
    if tone == "executive":
        report = generate_executive_summary(final_output)
    else:
        report = format_full_report(final_output, tone=tone)

    return {
        "contract_id": final_output["contract_id"],
        "tone": tone,
        "overall_risk": final_output["overall_risk"],
        "report": report
    }

Step 1 - We initialized a FastAPI application that will serve as the backend for contract analysis. This file (`app.py`) becomes the entry point of the backend API.

Step 2 - We created helper functions to read uploaded contract files and validate them. Error handling was added for empty files and unsupported formats.

Step 3 - We created a `/analyze` API endpoint that:
1. Accepts contract text files
2. Runs the complete analysis pipeline
3. Generates a report in user-selected tone (executive/simple/formal)
4. Returns structured output as JSON

In [None]:
# uvicorn app:app --reload from terminal to run the API

We run the FastAPI server using Uvicorn with auto-reload enabled. FastAPI’s Swagger UI makes it easy to test the endpoint directly from the browser. Tested 10 cases using postman.