In [1]:
import time
import uuid
from datetime import datetime
from typing import TypedDict, Dict, List

# LangGraph
from langgraph.graph import StateGraph, START, END

# Vector DB
import chromadb
from chromadb.config import Settings


In [2]:
class ContractState(TypedDict):
    contract_id: str
    contract_text: str
    legal: Dict
    compliance: Dict
    finance: Dict
    operations: Dict
    start_time: float
    end_time: float



In [3]:
def legal_agent(state: ContractState):
    time.sleep(1)
    return {
        "risk_level": "medium",
        "issues": ["Termination clause ambiguity"],
        "confidence": 0.85
    }

def compliance_agent(state: ContractState):
    time.sleep(1)
    return {
        "risk_level": "medium",
        "issues": ["Missing data privacy clause"],
        "confidence": 0.80
    }

def finance_agent(state: ContractState):
    time.sleep(1)
    return {
        "risk_level": "medium",
        "issues": ["Penalty escalation risk"],
        "confidence": 0.82
    }

def operations_agent(state: ContractState):
    time.sleep(1)
    return {
        "risk_level": "low",
        "issues": [],
        "confidence": 0.90
    }


In [4]:
graph = StateGraph(ContractState)

graph.add_node("legal", legal_agent)
graph.add_node("compliance", compliance_agent)
graph.add_node("finance", finance_agent)
graph.add_node("operations", operations_agent)

graph.add_edge(START, "legal")
graph.add_edge(START, "compliance")
graph.add_edge(START, "finance")
graph.add_edge(START, "operations")

graph.add_edge("legal", END)
graph.add_edge("compliance", END)
graph.add_edge("finance", END)
graph.add_edge("operations", END)


<langgraph.graph.state.StateGraph at 0x19c9f83f4d0>

In [5]:
contract_graph = graph.compile()


In [6]:
state = {
    "contract_id": "C-001",
    "contract_text": "Sample contract text",
    "start_time": time.time()
}

parallel_result = contract_graph.invoke(state)
state["end_time"] = time.time()

print("Parallel Runtime:", state["end_time"] - state["start_time"])


Parallel Runtime: 1.0062987804412842


In [11]:
from chromadb import Client
from chromadb.config import Settings
import os

persist_dir = "./memory"
os.makedirs(persist_dir, exist_ok=True)

# New-style initialization without the deprecated parameter
client = Client(Settings(
    persist_directory=persist_dir,
    is_persistent=True   # tell Chroma you want persistence
))

collection = client.get_or_create_collection("agent_memory")
print("Initialized ChromaDB client!")



Initialized ChromaDB client!


In [22]:
def store_agent_output(agent, output, contract_id):
    collection.add(
        documents=[str(output)],
        metadatas=[{
            "agent": agent,
            "contract_id": contract_id,
            "risk_level": output.get("risk_level"),  # use get to avoid KeyError
            "confidence": output.get("confidence", None),  # default to None if missing
            "timestamp": datetime.utcnow().isoformat()
        }],
        ids=[str(uuid.uuid4())]
    )



In [23]:
# Iterate over the keys you want to store
expected_keys = ["legal", "compliance", "finance", "operations"]

for key in expected_keys:
    data = parallel_result.get(key)
    if data:  # Only call if data exists and is not empty
        store_agent_output(key, data, "C-001")
    else:
        print(f"Warning: '{key}' not found or empty in parallel_result")







In [24]:
def query_memory(agent=None):
    where = {"agent": agent} if agent else {}
    return collection.get(where=where)


In [25]:
legal_memory = query_memory("legal")
finance_memory = query_memory("finance")


In [26]:
# Query all memory safely
all_mem = collection.get(
    where=None,  # no filter
    include=["metadatas", "documents"]
)

# Extract metadata safely
metadatas = all_mem.get("metadatas", [])

# Build shared context string
shared_context = "\n".join([
    f"{m.get('agent', 'Unknown')} risk: {m.get('risk_level', 'N/A')}"
    for m in metadatas
])

print(shared_context)







In [34]:
from datetime import datetime
import uuid
import time

def store_agent_output(agent, output, contract_id, collection, retries=3, chunk_size=1000):
    """
    Safely stores agent output into a vector collection with retry logic.

    Parameters:
    - agent: Name of the agent (string)
    - output: Dictionary with keys like 'risk_level', 'confidence', 'reason', etc.
    - contract_id: ID of the contract
    - collection: Vector collection object with .add() method
    - retries: Number of times to retry on timeout
    - chunk_size: Max size of each document chunk
    """
    
    # Convert output to string and split into chunks if large
    output_str = str(output)
    chunks = [output_str[i:i+chunk_size] for i in range(0, len(output_str), chunk_size)]
    
    for chunk in chunks:
        for attempt in range(retries):
            try:
                collection.add(
                    documents=[chunk],
                    metadatas=[{
                        "agent": agent,
                        "contract_id": contract_id,
                        "risk_level": output.get("risk_level"),
                        "confidence": output.get("confidence"),  # will store None if missing
                        "timestamp": datetime.utcnow().isoformat()
                    }],
                    ids=[str(uuid.uuid4())]
                )
                break  # success, move to next chunk
            except Exception as e:
                if "timeout" in str(e).lower() or "ReadTimeout" in str(e):
                    print(f"[WARNING] Timeout occurred, retrying {attempt+1}/{retries}...")
                    time.sleep(2)  # wait before retry
                else:
                    raise  # re-raise other exceptions



In [37]:
from typing import TypedDict, Optional

class GraphState(TypedDict):
    contract_text: str
    legal: Optional[dict]
    finance: Optional[dict]
    compliance: Optional[dict]
    operations: Optional[dict]
    shared_context: Optional[str]


In [38]:
def build_shared_context(all_agent_memory):
    return "\n".join([
        f"{m.metadata['agent']} risk: {m.metadata['risk_level']}"
        for m in all_agent_memory.matches
    ])


In [39]:
def refined_legal_agent(state: GraphState):
    shared_context = state["shared_context"]

    refined_output = {
        "risk_level": "high",
        "reason": "Termination clause combined with financial penalties",
        "confidence": 0.88
    }

    return {"legal": refined_output}


In [40]:
graph.add_node("refined_legal", refined_legal_agent)
graph.add_edge("memory_retrieval", "refined_legal")
graph.add_edge("refined_legal", END)


Adding a node to a graph that has already been compiled. This will not be reflected in the compiled graph.
Adding an edge to a graph that has already been compiled. This will not be reflected in the compiled graph.
Adding an edge to a graph that has already been compiled. This will not be reflected in the compiled graph.


<langgraph.graph.state.StateGraph at 0x19c9f83f4d0>

In [43]:
from datetime import datetime

refined_legal = {
    "risk_level": "high",
    "reason": "Termination clause combined with financial penalties",
    "confidence": 0.90,
    "refined": True,
    "stage": "refinement",
    "updated_at": datetime.utcnow().isoformat()
}

refined_compliance = {
    "risk_level": "high",
    "reason": "Finance penalty risk escalates compliance exposure",
    "confidence": 0.88,
    "refined": True,
    "stage": "refinement",
    "updated_at": datetime.utcnow().isoformat()
}

store_agent_output("legal", refined_legal, "C-001", collection)
store_agent_output("compliance", refined_compliance, "C-001", collection)



  "updated_at": datetime.utcnow().isoformat()
  "updated_at": datetime.utcnow().isoformat()
  "timestamp": datetime.utcnow().isoformat()
C:\Users\KIIT\.cache\chroma\onnx_models\all-MiniLM-L6-v2\onnx.tar.gz:  64%|██████▍   | 50.8M/79.3M [30:55<17:23, 28.7kiB/s]  


ReadError: [WinError 10054] An existing connection was forcibly closed by the remote host in add.

In [44]:
FINAL_CONTRACT_SCHEMA = {
    "contract_id": "C-001",
    "legal": refined_legal,
    "compliance": refined_compliance,
    "finance": parallel_result["finance"],
    "operations": parallel_result["operations"],
    "overall_risk": "high",
    "confidence_score": 0.84,
    "high_risk_clauses": [
        "Termination penalties",
        "Missing privacy clause"
    ],
    "generated_at": datetime.utcnow().isoformat()
}


KeyError: 'finance'

In [45]:
def generate_report(final_json, tone="executive"):
    return f"""
EXECUTIVE SUMMARY
• Overall Risk: {final_json['overall_risk']}
• Confidence: {final_json['confidence_score']}

LEGAL
• {final_json['legal']}

COMPLIANCE
• {final_json['compliance']}

FINANCE
• {final_json['finance']}

OPERATIONS
• {final_json['operations']}

RECOMMENDATIONS
• Review high-risk clauses
• Apply mitigation controls
"""


In [1]:
from fastapi import FastAPI, UploadFile, File, HTTPException

app = FastAPI(title="Contract Analysis API")


In [2]:
@app.post("/analyze")
async def analyze_contract(file: UploadFile = File(...), tone: str="executive"):
    if not file.filename:
        raise HTTPException(status_code=400, detail="Empty file")

    text = (await file.read()).decode("utf-8")

    # Run pipeline (reuse logic above)
    final_json = FINAL_CONTRACT_SCHEMA
    report = generate_report(final_json, tone)

    return {
        "contract_id": final_json["contract_id"],
        "generated_at": final_json["generated_at"],
        "analysis": final_json,
        "report": report
    }


In [4]:
if contracts:
    for c in contracts:
        st.success(f"Uploaded: {c['name']}")
        st.info(f"Size: {c['size']} bytes | Characters: {c['chars']}")

        with st.expander(f"Preview {c['name']}"):
            st.text(c["text"][:1500])
else:
    st.warning("No valid contract files uploaded.")


NameError: name 'contracts' is not defined