In [None]:
from google.colab import drive
drive.mount('/content/drive')

**************Co -Ordinator*********************

why we use co-ordinator?:

agents are domain expert  where each agent reads contract context, what only what is needed in a json structure manner

Whereas ,co-ordinator decides what to show to user (the things asked by user only is displayed to the user co-ordinator is responsible for doing such things here)


In [None]:
import json

def load_agent_output(path):
    with open(path, "r") as f:
        data = json.load(f)
    return data["output"]  # üî• ONLY this matters


In [None]:
legal_output = load_agent_output("/content/drive/MyDrive/info_google/Data/legal_agent_outputs/legal_agent_output.json")
compliance_output = load_agent_output("/content/drive/MyDrive/info_google/Data/compliance_agent_outputs/compliance_agent_output.json")
finance_output = load_agent_output("/content/drive/MyDrive/info_google/Data/finance_agent_outputs/finance_agent_output.json")
operations_output = load_agent_output("/content/drive/MyDrive/info_google/Data/operations_agent_outputs/operations_agent_output.json")


In [None]:
ROUTING_RULES = {
    "legal": ["termination", "governing law", "jurisdiction", "indemnity", "breach"],
    "compliance": ["gdpr", "audit", "regulatory", "data protection"],
    "finance": ["payment", "fee", "penalty", "invoice"],
    "operations": ["deliverable", "timeline", "sla", "milestone"]
}


here, keywords are very important.  Because , this is the main thing that where agent understand that this is related to something like(legal,compliance etc)

these keywords are used to map the agent like which agent  should come into play

In [None]:
def route_query(query: str):
    query = query.lower()
    selected_agents = []

    for agent, keywords in ROUTING_RULES.items():
        if any(keyword in query for keyword in keywords):
            selected_agents.append(agent)

    return selected_agents


core logic to map the keywords with the agent and returns list of agents used for that given query

In [None]:
def coordinator_execute(query: str):
    agents = route_query(query)
    results = {}

    for agent in agents:
        if agent == "legal":
            results["legal"] = legal_output
        elif agent == "compliance":
            results["compliance"] = compliance_output
        elif agent == "finance":
            results["finance"] = finance_output
        elif agent == "operations":
            results["operations"] = operations_output

    return {
        "query": query,
        "agents_used": agents,
        "results": results
    }


this is the main part of the system : the process is -

1.User query comes in

2.Routing decides which agents matter

3.Coordinator fetches existing outputs

4.Aggregates them

5.Returns a clean response


here , it won't rerun the agents and won't hallucinate

In [None]:
query = "Explain termination and indemnity risks in the contract"
final_output = coordinator_execute(query)
print(json.dumps(final_output, indent=2))

ClauseAI first uses a Retrieval-Augmented Generation (RAG) pipeline to retrieve pertinent contract sections. Only these retrieved sections are then examined by specialized agents, who then extract domain-specific clauses‚Äîlike indemnity or termination‚Äîin a structured JSON format. To guarantee accuracy, every extracted clause is copied exactly from the contract. User queries are routed to the relevant agents by a coordinator layer, which then compiles their verified outputs into a single response. All extracted insights are guaranteed to be modular, explicable, and traceable back to the original contract text thanks to this layered approach.

Are Agents and co-ordinator are same ?

* agents are domain specific , and they will analyze what is going on etc..
* co-ordinator are cross domain ,rule based , just they are used to to decide which agents is used for what query and summarize all the agent't output into one that human can understand

why we need coordinator in this project?

Now

- Clean output

- Correct relevance

- Demo-ready

- Explainable

Future

- Scale to 20+ agents

- Plug into chat UI

- Enterprise workflows

- Replace routing logic without touching agents

 ********************Lang Graph***************

In [None]:
pip install langgraph


what is Langgraph : Basically it is a framwork that lets us to:
* Represent agent's as node
* Represent execution order as edges
* maintian a shared state that flows through agents



Why we use langgraph in our project:
  * strctured execution
  * explicit graph
  * easy to add /remove agents
  * clear execution path


In [None]:
from langgraph.graph import StateGraph, END

In [None]:
from typing import TypedDict, Dict, Any
class ClauseAIState(TypedDict):
    query: str
    legal: Dict[str, Any]
    compliance: Dict[str, Any]
    finance: Dict[str, Any]
    operations: Dict[str, Any]


here,we defined a shared graph state that carries query and accumulated outputs from all agents as the graph executes

In [None]:
import json

def load_output(path: str):
    with open(path, "r") as f:
        return json.load(f)["output"]


here, it will recieve shared graph state , perform one agent's work , update it to the shared graph state and finally return shared graph state

In [None]:
LEGAL_KEYWORDS = ["termination", "indemnity", "governing law", "jurisdiction", "breach"]
COMPLIANCE_KEYWORDS = ["gdpr", "audit", "regulatory", "data protection"]
FINANCE_KEYWORDS = ["payment", "fee", "penalty", "invoice"]
OPERATIONS_KEYWORDS = ["sla", "timeline", "milestone", "deliverable"]


In [None]:
##Legal Agent node

def legal_node(state: ClauseAIState) -> ClauseAIState:
    print("‚ñ∂Ô∏è Executing Legal Agent")

    legal_output = load_output("/content/drive/MyDrive/info_google/Data/legal_agent_outputs/legal_agent_output.json")
    state["legal"] = legal_output

    return state


In [None]:
## compliance Agent node
def compliance_node(state: ClauseAIState) -> ClauseAIState:
    print("‚ñ∂Ô∏è Executing Compliance Agent")

    compliance_output = load_output("/content/drive/MyDrive/info_google/Data/compliance_agent_outputs/compliance_agent_output.json")
    state["compliance"] = compliance_output

    return state


In [None]:
## finance Agent node
def finance_node(state: ClauseAIState) -> ClauseAIState:
    print("‚ñ∂Ô∏è Executing Finance Agent")

    finance_output = load_output("/content/drive/MyDrive/info_google/Data/finance_agent_outputs/finance_agent_output.json")
    state["finance"] = finance_output

    return state


In [None]:
## operation Agent node
def operations_node(state: ClauseAIState) -> ClauseAIState:
    print("‚ñ∂Ô∏è Executing Operations Agent")

    operations_output = load_output("/content/drive/MyDrive/info_google/Data/operations_agent_outputs/operations_agent_output.json")
    state["operations"] = operations_output

    return state



In [None]:
from langgraph.graph import StateGraph
graph = StateGraph(ClauseAIState)


In [None]:
graph.add_node("legal_agent", legal_node)
graph.add_node("compliance_agent", compliance_node)
graph.add_node("finance_agent", finance_node)
graph.add_node("operations_agent", operations_node)

In [None]:
graph.set_entry_point("legal_agent")
graph.add_edge("legal_agent", "compliance_agent")
graph.add_edge("compliance_agent", "finance_agent")
graph.add_edge("finance_agent", "operations_agent")
graph.add_edge("operations_agent", END)


Basic agents flow "Legal ‚Üí Compliance ‚Üí Finance ‚Üí Operations ‚Üí END
"

In [None]:
app = graph.compile()

In [None]:
input_state = {
    "query": "Explain termination and indemnity risks",
    "legal": {},
    "compliance": {},
    "finance": {},
    "operations": {}
}
res = app.invoke(input_state)
res.keys()

In [None]:
res['compliance']

In [None]:
input_state = {
    "query": "Review termination, GDPR compliance, payment terms, and SLAs",
    "legal": {},
    "compliance": {},
    "finance": {},
    "operations": {}
}
result = app.invoke(input_state)
result.keys()


In [None]:
result['legal']

In [None]:
result['compliance']

In [None]:
result['finance']

In [None]:
result['operations']

changing the agent's order to "compliance->Legal  ‚Üí Finance ‚Üí Operations ‚Üí END"

In [None]:
# 1Ô∏è‚É£ Create NEW graph
graph = StateGraph(ClauseAIState)

graph.add_node("legal_agent", legal_node)
graph.add_node("compliance_agent", compliance_node)
graph.add_node("finance_agent", finance_node)
graph.add_node("operations_agent", operations_node)

graph.set_entry_point("compliance_agent")


graph.add_edge("compliance_agent", "legal_agent")
graph.add_edge("legal_agent", "finance_agent")
graph.add_edge("finance_agent", "operations_agent")
graph.add_edge("operations_agent", END)

app = graph.compile()


In [None]:
input_state = {
    "query": "Review termination, GDPR compliance, payment terms, and SLAs",
    "legal": {},
    "compliance": {},
    "finance": {},
    "operations": {}
}
result = app.invoke(input_state)
result.keys()


when we changing the agent's order the graph is exactly working as expected
for example , entry agent is compliance here . So,Compliance agent is executing first

********************************************

********************************************
Removing finance agent

In [None]:
# 1Ô∏è‚É£ Create NEW graph
graph = StateGraph(ClauseAIState)

graph.add_node("legal_agent", legal_node)
graph.add_node("compliance_agent", compliance_node)
graph.add_node("operations_agent", operations_node)

graph.set_entry_point("compliance_agent")


graph.add_edge("compliance_agent", "legal_agent")
graph.add_edge("legal_agent", "operations_agent")
graph.add_edge("operations_agent", END)

app = graph.compile()


In [None]:
input_state = {
    "query": "Review termination, GDPR compliance, payment terms, and SLAs",
    "legal": {},
    "compliance": {},
    "finance": {},
    "operations": {}
}
result = app.invoke(input_state)
result.keys()


In [None]:
result['finance']

*  Langgraph doent require all agents
* Removing an agent does not break the system

* Shared state remains consistent

* Pipeline adapts structurally

In [None]:
import json
print(json.dumps(result, indent=2))


# ******************Conditional Routing in LangGraph***********

In [None]:
from typing import TypedDict, Dict, Any
class MultiAgentState(TypedDict):
    query: str
    legal: Dict[str, Any]
    compliance: Dict[str, Any]
    finance: Dict[str, Any]
    operations: Dict[str, Any]


In [None]:
ROUTING_KEYWORDS = {
    "legal_agent": [
        "termination", "indemnity", "governing law", "jurisdiction", "breach"
    ],
    "compliance_agent": [
        "gdpr", "audit", "regulatory", "data protection"
    ],
    "finance_agent": [
        "payment", "fee", "penalty", "invoice", "late payment"
    ],
    "operations_agent": [
        "sla", "timeline", "milestone", "deliverable"
    ]
}


In [None]:
def router_node(state: MultiAgentState) -> MultiAgentState:
    print("üîÄ Router node executed")
    return state


In [None]:
def route_query(state: MultiAgentState) -> str:
    query = state["query"].lower()

    for agent, keywords in ROUTING_KEYWORDS.items():
        if any(k in query for k in keywords):
            print(f"üîÄ Routing to {agent}")
            return agent

    print("üîÄ No match ‚Üí defaulting to legal_agent")
    return "legal_agent"


we are changing the path to

START ‚Üí Router

              ‚îú‚îÄ‚îÄ Legal ‚Üí END

              ‚îú‚îÄ‚îÄ Compliance ‚Üí END

              ‚îú‚îÄ‚îÄ Finance ‚Üí END
              
              ‚îî‚îÄ‚îÄ Operations ‚Üí END


In [None]:
from langgraph.graph import StateGraph
graph = StateGraph(MultiAgentState)


In [None]:
graph.add_node("legal_agent", legal_node)
graph.add_node("compliance_agent", compliance_node)
graph.add_node("finance_agent", finance_node)
graph.add_node("operations_agent", operations_node)


graph.add_node("router", router_node)


here, router is decision node that returns the name of the next node and langgraph follows that decision

In [None]:
graph.set_entry_point("router") #we are saying langgraph that execution should start at the router

In [None]:
from langgraph.graph import END
graph.add_edge("legal_agent", END)
graph.add_edge("compliance_agent", END)
graph.add_edge("finance_agent", END)
graph.add_edge("operations_agent", END)


here, we saying that once that selected agent finishes the execution ,stop the graph  because without this execution is incomplete and langgraph will throw errors

In [None]:
graph.add_conditional_edges(
    "router",
    route_query,
    {
        "legal_agent": "legal_agent",
        "compliance_agent": "compliance_agent",
        "finance_agent": "finance_agent",
        "operations_agent": "operations_agent",
    }
)


We now connect the router to agents conditionally.

This tells LangGraph:
* ‚ÄúBased on what the router returns, go to the corresponding agent node.‚Äù

In [None]:
app = graph.compile()


In [None]:
state = {
    "query": "Review termination clause",
    "legal": {},
    "compliance": {},
    "finance": {},
    "operations": {}
}


In [None]:
result = app.invoke(state)
result.keys()

In [None]:
result['legal']

In [None]:
state = {
"query": "Check late payment penalties",
"legal": {},
"compliance": {},
"finance": {},
"operations": {}
}

result = app.invoke(state)
result.keys()

In [None]:
result['finance']

******************TEST CASE 3 ‚Äî MULTIPLE INTENT

In [None]:
from typing import TypedDict, Dict, Any
from langgraph.graph import StateGraph, END
import json


In [None]:
class ClauseAIMultiAgentState(TypedDict):
    query: str
    legal: Dict[str, Any]
    compliance: Dict[str, Any]
    finance: Dict[str, Any]
    operations: Dict[str, Any]


In [None]:
ROUTING_KEYWORDS = {
    "legal": ["termination", "governing law", "jurisdiction", "indemnity"],
    "compliance": ["gdpr", "audit", "regulatory", "data protection"],
    "finance": ["payment", "fee", "penalty", "invoice"],
    "operations": ["deliverable", "timeline", "sla", "milestone"]
}


In [None]:
def route_query_multi(state: ClauseAIMultiAgentState):
    query = state["query"].lower()
    selected_agents = []

    for agent, keywords in ROUTING_KEYWORDS.items():
        if any(keyword in query for keyword in keywords):
            selected_agents.append(agent)

    if not selected_agents:
        selected_agents = ["legal"]  # fallback

    print(f"üîÄ Selected agents: {selected_agents}")
    return selected_agents


In [None]:
def load_output(path: str) -> dict:
    with open(path, "r", encoding="utf-8") as f:
        return json.load(f)["output"]




In [None]:
def legal_node(state: ClauseAIMultiAgentState):
    print("‚ñ∂Ô∏è Executing Legal Agent")
    state["legal"] = load_output(
        "/content/drive/MyDrive/info_google/Data/legal_agent_outputs/legal_agent_output.json"
    )
    return state


def compliance_node(state: ClauseAIMultiAgentState):
    print("‚ñ∂Ô∏è Executing Compliance Agent")
    state["compliance"] = load_output(
        "/content/drive/MyDrive/info_google/Data/compliance_agent_outputs/compliance_agent_output.json"
    )
    return state


def finance_node(state: ClauseAIMultiAgentState):
    print("‚ñ∂Ô∏è Executing Finance Agent")
    state["finance"] = load_output(
        "/content/drive/MyDrive/info_google/Data/finance_agent_outputs/finance_agent_output.json"
    )
    return state


def operations_node(state: ClauseAIMultiAgentState):
    print("‚ñ∂Ô∏è Executing Operations Agent")
    state["operations"] = load_output(
        "/content/drive/MyDrive/info_google/Data/operations_agent_outputs/operations_agent_output.json"
    )
    return state


In [None]:
def coordinator_node(state: ClauseAIMultiAgentState):
    agents = route_query_multi(state)

    for agent in agents:
        if agent == "legal":
            state = legal_node(state)
        elif agent == "compliance":
            state = compliance_node(state)
        elif agent == "finance":
            state = finance_node(state)
        elif agent == "operations":
            state = operations_node(state)

    return state


In [None]:
graph = StateGraph(ClauseAIMultiAgentState)
graph.add_node("coordinator", coordinator_node)
graph.set_entry_point("coordinator")
graph.add_edge("coordinator", END)

app = graph.compile()


In [None]:
state = {
    "query": "Check GDPR compliance and payment terms",
    "legal": {},
    "compliance": {},
    "finance": {},
    "operations": {}
}


In [None]:
result = app.invoke(state)
result.keys()


In [None]:
result['compliance']

In [None]:
result['finance']

In [None]:
result['operations']

extra Task :


```
- Add new keyword mapping
- Test multiple queries
- Observe which agent is selected
```

In [None]:
ROUTING_KEYWORDS = {
    "legal": ["termination","governing law","jurisdiction","indemnity","liability","dispute"],
    "compliance": ["gdpr","audit","regulatory","data protection","privacy"],
    "finance": ["payment","fee","penalty","invoice","late payment"],
    "operations": ["deliverable","timeline","sla","milestone","service level"]
}

In [None]:
state = {
    "query": "Explain indemnity and liability clauses",
    "legal": {},
    "compliance": {},
    "finance": {},
    "operations": {}
}

result = app.invoke(state)


In [None]:
state = {
    "query": "Check GDPR and data protection compliance",
    "legal": {},
    "compliance": {},
    "finance": {},
    "operations": {}
}

result = app.invoke(state)


In [None]:
state = {
    "query": "Review SLAs and milestone timelines",
    "legal": {},
    "compliance": {},
    "finance": {},
    "operations": {}
}

result = app.invoke(state)


In [None]:
state = {
    "query": "Check GDPR compliance and payment terms and termination clause",
    "legal": {},
    "compliance": {},
    "finance": {},
    "operations": {}
}

result = app.invoke(state)


# **Conversation Memory & State Persistence**

Without memory:

- Agents act independently

- No awareness of previous findings

- No cross-agent reasoning

With memory:

- Agents can see what others found

- You can build:

    - Refinement

    - Validation

    - Summarization

    - Conflict detection

In [None]:
from typing import TypedDict, List, Dict, Any

class GraphState(TypedDict):
    query: str
    memory: List[dict]
    legal: Dict[str, Any]
    compliance: Dict[str, Any]
    finance: Dict[str, Any]
    operations: Dict[str, Any]


In [None]:
input_state = {
    "query": "Check GDPR compliance and payment terms",
    "memory": [],
    "legal": {},
    "compliance": {},
    "finance": {},
    "operations": {}
}


here,memory should be empty at starting stage becuase later every agent will add one record later in below format

{
    "agent": "legal",
    "output": {...}
}


In [None]:
def legal_node(state: GraphState):
    print("‚ñ∂Ô∏è Executing Legal Agent")

    legal_output = load_output(
        "/content/drive/MyDrive/info_google/Data/legal_agent_outputs/legal_agent_output.json"
    )

    # Update agent-specific state
    state["legal"] = legal_output
    # üîπ Write to memory
    state["memory"].append({
        "agent": "legal",
        "output": legal_output
    })
    return state


def compliance_node(state: GraphState):
    print("‚ñ∂Ô∏è Executing compliance Agent")

    compliance_output = load_output(
        "/content/drive/MyDrive/info_google/Data/compliance_agent_outputs/compliance_agent_output.json"
    )

    # Update agent-specific state
    state["compliance"] = compliance_output
    # üîπ Write to memory
    state["memory"].append({
        "agent": "compliance",
        "output": compliance_output
    })
    return state



def finance_node(state: GraphState):
    print("‚ñ∂Ô∏è Executing finance Agent")

    finance_output = load_output(
        "/content/drive/MyDrive/info_google/Data/finance_agent_outputs/finance_agent_output.json"
    )

    # Update agent-specific state
    state["finance"] = finance_output
    # üîπ Write to memory
    state["memory"].append({
        "agent": "finance",
        "output": finance_output
    })
    return state


def operations_node(state: GraphState):
    print("‚ñ∂Ô∏è Executing operations Agent")

    operations_output = load_output(
        "/content/drive/MyDrive/info_google/Data/operations_agent_outputs/operations_agent_output.json"
    )

    # Update agent-specific state
    state["operations"] = operations_output
    # üîπ Write to memory
    state["memory"].append({
        "agent": "operations",
        "output": operations_output
    })
    return state

In [None]:
def coordinator_node(state: GraphState):
    agents = route_query_multi(state)

    for agent in agents:
        if agent == "legal":
            state = legal_node(state)

        elif agent == "compliance":
            state = compliance_node(state)

        elif agent == "finance":
            state = finance_node(state)

        elif agent == "operations":
            state = operations_node(state)

    return state


Co-Ordinator Node is the entry point of the graph it decides which agents run and it passes state(including memory) forward

In [None]:
def route_query_multi(state: GraphState):
    query = state["query"].lower()
    selected_agents = []

    for agent, keywords in ROUTING_KEYWORDS.items():
        if any(k in query for k in keywords):
            selected_agents.append(agent)

    if not selected_agents:
        selected_agents = ["legal"]

    print(f"üîÄ Selected agents: {selected_agents}")
    return selected_agents


In [None]:
import json

def load_output(path: str) -> dict:
    with open(path, "r", encoding="utf-8") as f:
        return json.load(f)["output"]


In [None]:
graph = StateGraph(GraphState)

graph.add_node("coordinator", coordinator_node)

graph.set_entry_point("coordinator")
graph.add_edge("coordinator", END)

app = graph.compile()


In [None]:
app = graph.compile()


In [None]:
input_state = {
    "query": "Check GDPR compliance and payment terms",
    "memory": [],
    "legal": {},
    "compliance": {},
    "finance": {},
    "operations": {}
}
result = app.invoke(input_state)

In [None]:
result['compliance']

In [None]:
result['finance']

In [None]:
result['legal']

In [None]:
print("üß† Memory Contents:\n")
for i, entry in enumerate(result["memory"], start=1):
    print(f"Step {i}:")
    print(f"Agent: {entry['agent']}")
    print(f"Output keys: {list(entry['output'].keys())}")
    print("-" * 40)


In [None]:
import pprint
pprint.pprint(result["memory"])


her , we got correct thing like , memory is started with empty list and now it contains executed agent's output

*****************Memory revicing extra task

In [None]:
from typing import TypedDict, List, Dict, Any

class GraphState(TypedDict):
    query: str
    memory: List[dict]
    legal: Dict[str, Any]
    compliance: Dict[str, Any]
    finance: Dict[str, Any]
    operations: Dict[str, Any]


In [None]:
import json

def load_output(path: str) -> dict:
    with open(path, "r", encoding="utf-8") as f:
        return json.load(f)["output"]


In [None]:
def legal_node(state: GraphState):
    legal_output = load_output("/content/drive/MyDrive/info_google/Data/legal_agent_outputs/legal_agent_output.json")

    state["legal"] = legal_output
    state["memory"].append({
        "agent": "legal",
        "output": legal_output
    })

    return state


def compliance_node(state: GraphState):
    compliance_output = load_output("/content/drive/MyDrive/info_google/Data/compliance_agent_outputs/compliance_agent_output.json")

    state["compliance"] = compliance_output
    state["memory"].append({
        "agent": "compliance",
        "output": compliance_output
    })

    return state


def finance_node(state: GraphState):
    finance_output = load_output("/content/drive/MyDrive/info_google/Data/finance_agent_outputs/finance_agent_output.json")

    state["finance"] = finance_output
    state["memory"].append({
        "agent": "finance",
        "output": finance_output
    })

    return state


def operations_node(state:GraphState):
  operations_output = load_output("/content/drive/MyDrive/info_google/Data/operations_agent_outputs/operations_agent_output.json")

  state["operations"] = operations_output
  state["memory"].append({
        "agent": "operations",
        "output": operations_output
    })

  return state

In [None]:
ROUTING_KEYWORDS = {
    "legal": ["termination", "indemnity", "liability"],
    "compliance": ["gdpr", "privacy", "data protection"],
    "finance": ["payment", "fee", "penalty"]
}

def route_query_multi(state: GraphState):
    query = state["query"].lower()
    selected = []

    for agent, keywords in ROUTING_KEYWORDS.items():
        if any(k in query for k in keywords):
            selected.append(agent)

    if not selected:
        selected = ["legal"]

    print(f"üîÄ Selected agents: {selected}")
    return selected


In [None]:
def coordinator_node(state: GraphState):
    agents = route_query_multi(state)

    for agent in agents:
        if agent == "legal":
            print("‚ñ∂Ô∏è Executing Legal Agent")
            state = legal_node(state)

        elif agent == "compliance":
            print("‚ñ∂Ô∏è Executing Compliance Agent")
            state = compliance_node(state)

        elif agent == "finance":
            print("‚ñ∂Ô∏è Executing Finance Agent")
            state = finance_node(state)

        # üî• THIS IS TASK 3 OUTPUT
        print("\nüß† Memory Accumulation Order:")
        for i, entry in enumerate(state["memory"], start=1):
            print(f"  Step {i}: {entry['agent']}")
        print("-" * 50)

    return state

In [None]:
from langgraph.graph import StateGraph, END

graph = StateGraph(GraphState)
graph.add_node("coordinator", coordinator_node)
graph.set_entry_point("coordinator")
graph.add_edge("coordinator", END)

app = graph.compile()


In [None]:
input_state = {
    "query": "Check GDPR compliance and payment terms",
    "memory": [],
    "legal": {},
    "compliance": {},
    "finance": {},
    "operations": {}
}


In [None]:
final_state = app.invoke(input_state)


In [None]:
[m["agent"] for m in final_state["memory"]]


In [None]:
final_result = {
    "query": final_state["query"],
    "execution_order": [m["agent"] for m in final_state["memory"]],
    "results": {
        "compliance": final_state["compliance"],
        "finance": final_state["finance"],
        "legal": final_state["legal"],
        "operations": final_state["operations"]
    }
}

final_result


#**Agent-to-Agent Communication & Validation Logic**

In [None]:
from typing import TypedDict, List, Dict, Any

class GraphState(TypedDict):
    query: str

    # Shared memory of agent findings (chronological)
    memory: List[dict]

    # Validation and cross-agent notes
    validation_notes: List[str]

    # Agent-specific latest outputs
    legal: Dict[str, Any]
    compliance: Dict[str, Any]
    finance: Dict[str, Any]
    operations: Dict[str, Any]


We are extending the graph state again so that agents can:

* Read what other agents discovered

* Add validation notes

* Collaborate instead of working in isolation

In [None]:
import json

def load_output(path: str) -> dict:
    with open(path, "r", encoding="utf-8") as f:
        return json.load(f)["output"]


In [None]:
input_state = {
    "query": "Check GDPR compliance and payment terms",

    # Shared knowledge
    "memory": [],
    "validation_notes": [],


    "legal": {},
    "compliance": {},
    "finance": {},
    "operations": {}
}


In [None]:
def compliance_node(state: GraphState):
    # Load structured compliance output
    output = load_output(
        "/content/drive/MyDrive/info_google/Data/compliance_agent_outputs/compliance_agent_output.json"
    )

    # Store latest compliance output
    state["compliance"] = output

    # üîπ Write findings to shared memory
    state["memory"].append({
        "agent": "compliance",
        "findings": output.get("extracted_clauses", [])
    })

    return state


We now modify the Compliance Agent so that it:

- Writes its findings into the shared memory

 - Makes those findings readable by other agents

 - Acts as the first contributor in the collaborative chain

We Will share the most important from its output is extracted_clauses

These clauses may:

 - Trigger penalties

 - Affect payments

 - Cause regulatory risks

 - Interact with SLAs

 - So we explicitly store them in memory as findings.

In [None]:
def finance_node(state: GraphState):
    # üîç Read compliance findings from memory
    compliance_findings = [
        m for m in state["memory"] if m["agent"] == "compliance"
    ]

    # Load structured finance output
    output = load_output(
        "/content/drive/MyDrive/info_google/Data/finance_agent_outputs/finance_agent_output.json"
    )

    # Store latest finance output
    state["finance"] = output

    # üß† Cross-agent validation
    if compliance_findings:
        state["validation_notes"].append(
            "Finance reviewed compliance findings for potential penalty or regulatory conflicts."
        )

    # üîπ Write finance findings to memory
    state["memory"].append({
        "agent": "finance",
        "findings": output.get("extracted_clauses", [])
    })

    return state


Finance Agent will first reads compliance agent findings from the state['memory'] and perform its own analysis then add a validation rule if there is a potential conflict

and finally write its own findings to the memory

Why Finance Needs Compliance Memory

Compliance findings may contain:

 - Regulatory obligations

-  Audit requirements

-  Data protection penalties

Finance clauses may include:

 - Late fees

-  Penalties

-  Payment suspensions

‚ö†Ô∏è These can conflict or compound risk, so Finance must check compliance findings first.

In [None]:
def legal_node(state: GraphState):
    # üîç Read all prior findings
    compliance_findings = [
        m for m in state["memory"] if m["agent"] == "compliance"
    ]
    finance_findings = [
        m for m in state["memory"] if m["agent"] == "finance"
    ]

    # Load structured legal output
    output = load_output(
        "/content/drive/MyDrive/info_google/Data/legal_agent_outputs/legal_agent_output.json"
    )

    # Store latest legal output
    state["legal"] = output

    # ‚öñÔ∏è Final legal validation
    if compliance_findings and finance_findings:
        state["validation_notes"].append(
            "Legal validated compliance and finance clauses for contractual enforceability and risk consistency."
        )

    # üîπ Write legal findings to memory
    state["memory"].append({
        "agent": "legal",
        "findings": output.get("extracted_clauses", [])
    })

    return state


The Legal Agent acts as the final validator.

It will:

Read all prior agent findings (Compliance + Finance) from shared memory

 - Perform a final legal consistency and enforceability check

 - Add a legal validation note

 - Write its own findings back to memory

 - This step closes the collaboration loop.

üß† Why Legal Agent Comes Last

Legal is responsible for:

- Enforceability ,  Contractual consistency , Risk aggregation

In [None]:
from langgraph.graph import StateGraph, END

graph = StateGraph(GraphState)

# Add collaborative agent nodes
graph.add_node("compliance_agent", compliance_node)
graph.add_node("finance_agent", finance_node)
graph.add_node("legal_agent", legal_node)

# Define execution order
graph.set_entry_point("compliance_agent")
graph.add_edge("compliance_agent", "finance_agent")
graph.add_edge("finance_agent", "legal_agent")
graph.add_edge("legal_agent", END)


In [None]:
app=graph.compile()

In [None]:
input_state = {
    "query": "Check GDPR compliance, payment penalties, and enforceability of clauses",

    # Shared collaboration fields
    "memory": [],
    "validation_notes": [],

    # Agent outputs
    "legal": {},
    "compliance": {},
    "finance": {},
    "operations": {}
}
final_state = app.invoke(input_state)

In [None]:
print("üß† FINAL MEMORY (Agent Findings Order):\n")

for i, entry in enumerate(final_state["memory"], start=1):
    print(f"Step {i}")
    print(f"Agent   : {entry['agent']}")
    print(f"Findings: {entry['findings']}")
    print("-" * 60)


In [None]:
print("‚öñÔ∏è VALIDATION NOTES (Cross-Agent Checks):\n")

for i, note in enumerate(final_state["validation_notes"], start=1):
    print(f"{i}. {note}")


In [None]:
final_results = {
    "compliance": final_state["compliance"],
    "finance": final_state["finance"],
    "legal": final_state["legal"]
}

final_results


***Extra Task

In [None]:
def operations_node(state: GraphState):
    # üîç Read legal findings from memory
    legal_findings = [
        m for m in state["memory"] if m["agent"] == "legal"
    ]

    # Load structured operations output
    output = load_output(
        "/content/drive/MyDrive/info_google/Data/operations_agent_outputs/operations_agent_output.json"
    )

    # Store latest operations output
    state["operations"] = output

    # ‚öôÔ∏è SLA enforceability validation
    if legal_findings:
        state["validation_notes"].append(
            "Operations reviewed SLAs and deliverables for legal enforceability."
        )

    # üîπ Write operations findings to memory
    state["memory"].append({
        "agent": "operations",
        "findings": output.get("extracted_clauses", [])
    })

    return state


In [None]:
graph = StateGraph(GraphState)

graph.add_node("compliance_agent", compliance_node)
graph.add_node("finance_agent", finance_node)
graph.add_node("legal_agent", legal_node)
graph.add_node("operations_agent", operations_node)

graph.set_entry_point("compliance_agent")
graph.add_edge("compliance_agent", "finance_agent")
graph.add_edge("finance_agent", "legal_agent")
graph.add_edge("legal_agent", "operations_agent")
graph.add_edge("operations_agent", END)

app = graph.compile()


In [None]:
input_state = {
    "query": "Check GDPR compliance, payment penalties, SLA enforceability, and termination clauses",

    "memory": [],
    "validation_notes": [],

    "legal": {},
    "compliance": {},
    "finance": {},
    "operations": {}
}

final_state = app.invoke(input_state)
final_state["validation_notes"]


# Compliance Pipeline

In [None]:
pip install pinecone

In [None]:
import pinecone
from sentence_transformers import SentenceTransformer

embedding_model = SentenceTransformer("all-MiniLM-L6-v2")


In [None]:

from pinecone import Pinecone
pc = Pinecone(
    api_key="pcsk_3HPjGQ_2oVY4K1h79bcHXQ6inaKTRGF7Sw3x1hhWD2FW4WZeY8KsXRT2miEpYR36CWLdik"
)

# Connect to existing index
index = pc.Index("cuad-index")


In [None]:
COMPLIANCE_QUERY = """
Identify clauses related to:
- Regulatory compliance
- Data protection
- Audits and reporting
"""


The Compliance Query Template is the single source of truth that drives:

 - What the RAG layer retrieves

 - Which clauses are even visible to the Compliance Agent

 - What kind of risk signals can be detected downstream

In other words:

- If a clause is not retrievable via this query, it does not exist for the agent.

In [None]:
def retrieve_compliance_chunks(query: str, top_k: int = 5):
    """
    Retrieve compliance-related contract chunks using semantic search.
    """

    # Convert query to embedding (model already loaded)
    query_embedding = embedding_model.encode(query).tolist()

    # Query vector database
    response = index.query(
        vector=query_embedding,
        top_k=top_k,
        include_metadata=True
    )

    # Format results
    retrieved_chunks = []
    for match in response["matches"]:
        retrieved_chunks.append({
            "chunk_id": match["id"],
            "text": match["metadata"]["text"],
            "score": round(match["score"], 3)
        })

    return retrieved_chunks


Imagine we have:

A very big contract (50+ pages)

You want only the parts related to compliance

You don‚Äôt want to read everything

so what we are going to do now?

* You search the contract using smart meaning-based search

* You collect only the important paragraphs

* You lock them and give them to the Compliance Agent

In [None]:
retrieved_chunks = retrieve_compliance_chunks(
    query=COMPLIANCE_QUERY,
    top_k=5
)

for chunk in retrieved_chunks:
    print(f"\nChunk ID: {chunk['chunk_id']}")
    print(f"Score: {chunk['score']}")
    print(chunk["text"])


In [None]:
def combine_retrieved_chunks(retrieved_chunks):
    if not retrieved_chunks:
        return ""

    combined_text = []
    for chunk in retrieved_chunks:
        combined_text.append(chunk["text"].strip())

    return "\n\n---\n\n".join(combined_text)


In [None]:
compliance_context = combine_retrieved_chunks(retrieved_chunks)
print(compliance_context)


here,we are extracted all the chunks and combined into one place

In [None]:
COMPLIANCE_AGENT_PROMPT = """
You are a Compliance Analysis Agent.

Your task:
- Identify compliance-related clauses ONLY from the provided text.

Rules:
- Copy clauses verbatim (no paraphrasing)
- Do NOT invent clauses
- Do NOT use external knowledge
- Use ONLY the provided text

Return output in strict JSON format:

{
  "clause_type": "compliance",
  "extracted_clauses": [],
  "risk_level": "low | medium | high | unknown",
  "confidence": 0.0,
  "evidence": []
}
"""


In [None]:
def run_compliance_agent(compliance_context: str, llm):
    """
    Runs the compliance agent on retrieved contract context.
    """

    if not compliance_context.strip():
        return {
            "clause_type": "compliance",
            "extracted_clauses": [],
            "risk_level": "unknown",
            "confidence": 0.0,
            "evidence": []
        }

    prompt = f"""
{COMPLIANCE_AGENT_PROMPT}

TEXT:
{compliance_context}
"""

    response = llm(prompt)

    return response


In [None]:
def llm(prompt: str):
    return {
        "clause_type": "compliance",
        "extracted_clauses": [],
        "risk_level": "unknown",
        "confidence": 0.0,
        "evidence": []
    }


In [None]:
compliance_output = run_compliance_agent(
    compliance_context=compliance_context,
    llm=llm
)

print(compliance_output)

In [None]:
print(compliance_context)


In [None]:
def validate_compliance_output(compliance_output, compliance_context):
    """
    Validate that extracted compliance clauses are grounded
    in retrieved contract text.
    """

    validated_output = compliance_output.copy()
    validation_notes = []

    grounded_clauses = []
    grounded_evidence = []

    context_text = compliance_context or ""

    # Validate extracted clauses
    for clause in compliance_output.get("extracted_clauses", []):
        if clause in context_text:
            grounded_clauses.append(clause)
        else:
            validation_notes.append(
                f"Removed ungrounded clause: {clause[:60]}..."
            )

    # Validate evidence
    for ev in compliance_output.get("evidence", []):
        if ev in context_text:
            grounded_evidence.append(ev)
        else:
            validation_notes.append(
                f"Removed ungrounded evidence: {ev[:60]}..."
            )

    # Update validated output
    validated_output["extracted_clauses"] = grounded_clauses
    validated_output["evidence"] = grounded_evidence

    # Adjust confidence if grounding failed
    if len(grounded_clauses) < len(compliance_output.get("extracted_clauses", [])):
        validated_output["confidence"] = round(
            validated_output.get("confidence", 0.0) * 0.5, 2
        )

    return validated_output, validation_notes


In [None]:
validated_compliance_output, validation_notes = validate_compliance_output(
    compliance_output=compliance_output,
    compliance_context=compliance_context
)

print(validated_compliance_output)
print(validation_notes)


In [None]:
def generate_compliance_risk_summary(validated_output):
    """
    Generate a human-readable compliance risk summary
    from validated compliance analysis.
    """

    clauses = validated_output.get("extracted_clauses", [])
    risk_level = validated_output.get("risk_level", "unknown")
    confidence = validated_output.get("confidence", 0.0)

    if not clauses:
        summary = (
            "No explicit compliance or regulatory clauses were identified "
            "in the analyzed contract sections. This may indicate missing "
            "or unclear compliance obligations."
        )
        overall_risk = "high"

    elif risk_level == "high":
        summary = (
            "Compliance clauses were identified, but they indicate "
            "significant regulatory or audit-related risks."
        )
        overall_risk = "high"

    elif risk_level == "medium":
        summary = (
            "Some compliance obligations are present, but they may be "
            "generic or incomplete."
        )
        overall_risk = "medium"

    else:
        summary = (
            "Clear compliance and regulatory obligations were identified "
            "in the contract."
        )
        overall_risk = "low"

    return {
        "overall_compliance_risk": overall_risk,
        "summary": summary,
        "confidence": confidence
    }



compliance_risk_summary = generate_compliance_risk_summary(
    validated_compliance_output
)

print(compliance_risk_summary)


In [None]:
def package_compliance_pipeline_output(
    query,
    validated_output,
    risk_summary,
    validation_notes
):
    """
    Package the final compliance pipeline output
    into a clean, user-facing structure.
    """

    return {
        "pipeline": "compliance",
        "query": query.strip(),
        "result": {
            "clause_type": validated_output.get("clause_type", "compliance"),
            "extracted_clauses": validated_output.get("extracted_clauses", []),
            "risk_level": risk_summary.get("overall_compliance_risk"),
            "confidence": risk_summary.get("confidence"),
            "evidence": validated_output.get("evidence", [])
        },
        "risk_summary": risk_summary.get("summary"),
        "validation_notes": validation_notes,
        "status": "completed"
    }



final_compliance_output = package_compliance_pipeline_output(
    query=COMPLIANCE_QUERY,
    validated_output=validated_compliance_output,
    risk_summary=compliance_risk_summary,
    validation_notes=validation_notes
)

print(final_compliance_output)


In [None]:
import json

print(json.dumps(final_compliance_output, indent=4))


It is the final output that this compliance agent is returning to the user or UI

####Extra Task

In [None]:
QUERY_A = """
Identify clauses related to:
- Regulatory compliance
- Data protection
- Audits and reporting
"""


QUERY_B = """
Identify clauses related to:
- GDPR
- Personal data processing
- Regulatory audits
- Data security obligations
"""


QUERY_C = """
Identify clauses related to:
- GDPR Article 32
- ISO 27001
- SOC 2 audits
"""


In [None]:
def run_compliance_pipeline(query):
    retrieved_chunks = retrieve_compliance_chunks(query)
    compliance_context = combine_retrieved_chunks(retrieved_chunks)

    compliance_output = run_compliance_agent(
        compliance_context=compliance_context,
        llm=llm
    )

    validated_output, validation_notes = validate_compliance_output(
        compliance_output,
        compliance_context
    )

    calibrated_confidence = calibrate_compliance_confidence(
        validated_output,
        compliance_context,
        validation_notes
    )

    validated_output["confidence"] = calibrated_confidence

    risk_summary = generate_compliance_risk_summary(validated_output)

    final_output = package_compliance_pipeline_output(
        query=query,
        validated_output=validated_output,
        risk_summary=risk_summary,
        validation_notes=validation_notes
    )

    return {
        "retrieved_chunks": retrieved_chunks,
        "final_output": final_output
    }


In [None]:
def calibrate_compliance_confidence(
    validated_output,
    compliance_context,
    validation_notes
):
    clauses = validated_output.get("extracted_clauses", [])
    evidence = validated_output.get("evidence", [])
    context = compliance_context or ""

    confidence = 0.0

    # 1. Clause count
    clause_count = len(clauses)
    if clause_count == 0:
        confidence += 0.0
    elif clause_count == 1:
        confidence += 0.3
    elif clause_count <= 3:
        confidence += 0.6
    else:
        confidence += 0.8

    # 2. Specificity
    for clause in clauses:
        text = clause.lower()
        if "gdpr" in text or "audit" in text or "regulator" in text:
            confidence += 0.3
        elif "data protection" in text:
            confidence += 0.2
        elif "applicable law" in text:
            confidence += 0.1

    # Cap specificity boost
    confidence = min(confidence, 1.0)

    # 3. Evidence coverage
    if context and evidence:
        coverage = sum(len(e) for e in evidence) / len(context)
        if coverage > 0.3:
            confidence += 0.2
        elif coverage > 0.1:
            confidence += 0.1

    # 4. Validation penalty
    removals = [n for n in validation_notes if "Removed" in n]
    if len(removals) == 1:
        confidence -= 0.2
    elif len(removals) >= 2:
        confidence -= 0.4

    return max(0.0, min(round(confidence, 2), 1.0))


In [None]:
result_A = run_compliance_pipeline(QUERY_A)
result_B = run_compliance_pipeline(QUERY_B)
result_C = run_compliance_pipeline(QUERY_C)


In [None]:
def show_clauses(label, result):
    print(f"\n=== {label} ===")
    clauses = result["final_output"]["result"]["extracted_clauses"]
    print("Clauses found:", len(clauses))
    for c in clauses:
        print("-", c[:80])


show_clauses("Query A", result_A)
show_clauses("Query B", result_B)
show_clauses("Query C", result_C)


In [None]:
def show_confidence(label, result):
    conf = result["final_output"]["result"]["confidence"]
    risk = result["final_output"]["result"]["risk_level"]
    print(f"{label}: confidence={conf}, risk={risk}")

show_confidence("Query A", result_A)
show_confidence("Query B", result_B)
show_confidence("Query C", result_C)



# **Finance Pipeline**

In [None]:
FINANCE_QUERY = """
Identify clauses related to:
- Payment terms
- Fees
- Penalties
- Invoicing
"""


In [None]:
def retrieve_finance_chunks(query: str, top_k: int = 5):
    """
    Retrieve finance-related contract chunks using semantic search.
    """

    # Convert query to embedding (model already loaded)
    query_embedding = embedding_model.encode(query).tolist()

    # Query Pinecone index
    response = index.query(
        vector=query_embedding,
        top_k=top_k,
        include_metadata=True
    )

    # Format retrieved chunks
    retrieved_chunks = []
    for match in response["matches"]:
        retrieved_chunks.append({
            "chunk_id": match["id"],
            "text": match["metadata"]["text"],
            "score": round(match["score"], 3)
        })

    return retrieved_chunks


In [None]:
retrieved_finance_chunks = retrieve_finance_chunks(
    query=FINANCE_QUERY,
    top_k=5
)

In [None]:
for chunk in retrieved_finance_chunks:
    print("\nChunk ID:", chunk["chunk_id"])
    print("Score:", chunk["score"])
    print(chunk["text"])


In [None]:
def combine_finance_chunks(retrieved_chunks):
    """
    Combine retrieved finance-related chunks into a single context string.
    """

    if not retrieved_chunks:
        return ""

    combined_text = []

    for chunk in retrieved_chunks:
        combined_text.append(chunk["text"].strip())

    finance_context = "\n\n---\n\n".join(combined_text)

    return finance_context


finance_context = combine_finance_chunks(retrieved_finance_chunks)

print(finance_context)


In [None]:
FINANCE_AGENT_PROMPT = """
You are a Finance Analysis Agent.

Your task:
- Identify finance-related clauses ONLY from the provided text.

Focus on:
- Payment terms
- Fees
- Penalties
- Interest on late payments

Rules:
- Copy clauses verbatim (no paraphrasing)
- Do NOT invent clauses
- Do NOT use external knowledge
- Use ONLY the provided text

Return output in strict JSON format:

{
  "clause_type": "finance",
  "extracted_clauses": [],
  "risk_level": "low | medium | high | unknown",
  "confidence": 0.0,
  "evidence": []
}
"""


In [None]:
def run_finance_agent(finance_context: str, llm):
    """
    Runs the finance agent on retrieved contract context.
    """

    if not finance_context.strip():
        return {
            "clause_type": "finance",
            "extracted_clauses": [],
            "risk_level": "unknown",
            "confidence": 0.0,
            "evidence": []
        }

    prompt = f"""
{FINANCE_AGENT_PROMPT}

TEXT:
{finance_context}
"""

    response = llm(prompt)

    return response


In [None]:
def llm(prompt: str):
    return {
        "clause_type": "compliance",
        "extracted_clauses": [],
        "risk_level": "unknown",
        "confidence": 0.0,
        "evidence": []
    }


In [None]:
finance_output = run_finance_agent(
    finance_context=finance_context,
    llm=llm
)

print(finance_output)


In [None]:
def validate_finance_output(finance_output, finance_context):
    """
    Validate that extracted finance clauses are grounded
    in retrieved contract text.
    """

    validated_output = finance_output.copy()
    validation_notes = []

    grounded_clauses = []
    grounded_evidence = []

    context_text = finance_context or ""

    # Validate extracted clauses
    for clause in finance_output.get("extracted_clauses", []):
        if clause in context_text:
            grounded_clauses.append(clause)
        else:
            validation_notes.append(
                f"Removed ungrounded finance clause: {clause[:60]}..."
            )

    # Validate evidence
    for ev in finance_output.get("evidence", []):
        if ev in context_text:
            grounded_evidence.append(ev)
        else:
            validation_notes.append(
                f"Removed ungrounded finance evidence: {ev[:60]}..."
            )

    validated_output["extracted_clauses"] = grounded_clauses
    validated_output["evidence"] = grounded_evidence

    # Reduce confidence if grounding failed
    if len(grounded_clauses) < len(finance_output.get("extracted_clauses", [])):
        validated_output["confidence"] = round(
            validated_output.get("confidence", 0.0) * 0.5, 2
        )

    return validated_output, validation_notes


In [None]:
validated_finance_output, finance_validation_notes = validate_finance_output(
    finance_output=finance_output,
    finance_context=finance_context
)

print(validated_finance_output)
print(finance_validation_notes)


In [None]:
def generate_finance_risk_summary(validated_output):
    """
    Generate a finance risk summary based on validated finance clauses.
    """

    clauses = validated_output.get("extracted_clauses", [])
    risk_level = validated_output.get("risk_level", "unknown")
    confidence = validated_output.get("confidence", 0.0)

    text_blob = " ".join(c.lower() for c in clauses)

    # Default assumptions
    overall_risk = "unknown"
    summary = "Unable to determine financial risk due to insufficient data."

    if not clauses:
        overall_risk = "high"
        summary = (
            "No explicit financial clauses were identified. "
            "This may indicate unclear or missing payment obligations."
        )

    elif "interest" in text_blob:
        overall_risk = "high"
        summary = (
            "Interest clauses were identified, indicating increased "
            "financial exposure in case of late payments."
        )

    elif "penalty" in text_blob or "late fee" in text_blob:
        overall_risk = "high"
        summary = (
            "Penalty or late fee clauses were identified, which may "
            "increase financial risk."
        )

    else:
        overall_risk = "medium"
        summary = (
            "Payment-related clauses were identified without explicit "
            "penalties or interest."
        )

    return {
        "overall_finance_risk": overall_risk,
        "summary": summary,
        "confidence": confidence
    }



finance_risk_summary = generate_finance_risk_summary(
    validated_finance_output
)

print(finance_risk_summary)


In [None]:
def package_finance_pipeline_output(
    query,
    validated_output,
    risk_summary,
    validation_notes
):
    """
    Package the final finance pipeline output
    into a clean, user-facing structure.
    """

    return {
        "pipeline": "finance",
        "query": query.strip(),
        "result": {
            "clause_type": validated_output.get("clause_type", "finance"),
            "extracted_clauses": validated_output.get("extracted_clauses", []),
            "risk_level": risk_summary.get("overall_finance_risk"),
            "confidence": risk_summary.get("confidence"),
            "evidence": validated_output.get("evidence", [])
        },
        "risk_summary": risk_summary.get("summary"),
        "validation_notes": validation_notes,
        "status": "completed"
    }




final_finance_output = package_finance_pipeline_output(
    query=FINANCE_QUERY,
    validated_output=validated_finance_output,
    risk_summary=finance_risk_summary,
    validation_notes=finance_validation_notes
)


In [None]:
import json
print(json.dumps(final_finance_output, indent=4))


*******Extra Task

In [None]:
FINANCE_QUERY = """
Identify clauses related to:
- Payment terms
- Fees
- Penalties
- Invoicing
-Interest
"""


In [None]:
def retrieve_finance_chunks(query: str, top_k: int = 5):
    """
    Retrieve finance-related contract chunks using semantic search.
    """

    # Convert query to embedding (model already loaded)
    query_embedding = embedding_model.encode(query).tolist()

    # Query Pinecone index
    response = index.query(
        vector=query_embedding,
        top_k=top_k,
        include_metadata=True
    )

    # Format retrieved chunks
    retrieved_chunks = []
    for match in response["matches"]:
        retrieved_chunks.append({
            "chunk_id": match["id"],
            "text": match["metadata"]["text"],
            "score": round(match["score"], 3)
        })

    return retrieved_chunks


In [None]:
for chunk in retrieved_finance_chunks:
    print("\nChunk ID:", chunk["chunk_id"])
    print("Score:", chunk["score"])
    print(chunk["text"])


def combine_finance_chunks(retrieved_chunks):
    """
    Combine retrieved finance-related chunks into a single context string.
    """

    if not retrieved_chunks:
        return ""

    combined_text = []

    for chunk in retrieved_chunks:
        combined_text.append(chunk["text"].strip())

    finance_context = "\n\n---\n\n".join(combined_text)

    return finance_context


finance_context = combine_finance_chunks(retrieved_finance_chunks)

print(finance_context)


In [None]:
def run_finance_agent(finance_context: str, llm):
    """
    Runs the finance agent on retrieved contract context.
    """

    if not finance_context.strip():
        return {
            "clause_type": "finance",
            "extracted_clauses": [],
            "risk_level": "unknown",
            "confidence": 0.0,
            "evidence": []
        }

    prompt = f"""
{FINANCE_AGENT_PROMPT}

TEXT:
{finance_context}
"""

    response = llm(prompt)

    return response


In [None]:
def llm(prompt: str):
    return {
        "clause_type": "compliance",
        "extracted_clauses": [],
        "risk_level": "unknown",
        "confidence": 0.0,
        "evidence": []
    }


In [None]:
finance_output = run_finance_agent(
    finance_context=finance_context,
    llm=llm
)

print(finance_output)


validated_finance_output, finance_validation_notes = validate_finance_output(
    finance_output=finance_output,
    finance_context=finance_context
)

print(validated_finance_output)
print(finance_validation_notes)


In [None]:
def generate_finance_risk_summary(validated_output):
    """
    Generate a finance risk summary based on validated finance clauses.
    """

    clauses = validated_output.get("extracted_clauses", [])
    risk_level = validated_output.get("risk_level", "unknown")
    confidence = validated_output.get("confidence", 0.0)

    text_blob = " ".join(c.lower() for c in clauses)

    # Default assumptions
    overall_risk = "unknown"
    summary = "Unable to determine financial risk due to insufficient data."

    if not clauses:
        overall_risk = "high"
        summary = (
            "No explicit financial clauses were identified. "
            "This may indicate unclear or missing payment obligations."
        )

    elif "interest" in text_blob:
        overall_risk = "high"
        summary = (
            "Interest clauses were identified, indicating increased "
            "financial exposure in case of late payments."
        )

    elif "penalty" in text_blob or "late fee" in text_blob:
        overall_risk = "high"
        summary = (
            "Penalty or late fee clauses were identified, which may "
            "increase financial risk."
        )

    else:
        overall_risk = "medium"
        summary = (
            "Payment-related clauses were identified without explicit "
            "penalties or interest."
        )

    return {
        "overall_finance_risk": overall_risk,
        "summary": summary,
        "confidence": confidence
    }



finance_risk_summary = generate_finance_risk_summary(
    validated_finance_output
)

print(finance_risk_summary)




In [None]:
def package_finance_pipeline_output(
    query,
    validated_output,
    risk_summary,
    validation_notes
):
    """
    Package the final finance pipeline output
    into a clean, user-facing structure.
    """

    return {
        "pipeline": "finance",
        "query": query.strip(),
        "result": {
            "clause_type": validated_output.get("clause_type", "finance"),
            "extracted_clauses": validated_output.get("extracted_clauses", []),
            "risk_level": risk_summary.get("overall_finance_risk"),
            "confidence": risk_summary.get("confidence"),
            "evidence": validated_output.get("evidence", [])
        },
        "risk_summary": risk_summary.get("summary"),
        "validation_notes": validation_notes,
        "status": "completed"
    }




final_finance_output_after = package_finance_pipeline_output(
    query=FINANCE_QUERY,
    validated_output=validated_finance_output,
    risk_summary=finance_risk_summary,
    validation_notes=finance_validation_notes
)


In [None]:
import json
print(json.dumps(final_finance_output_after, indent=4))


3rd extra task

In [None]:
def extract_finance_clauses(final_output):
    return set(
        final_output
        .get("result", {})
        .get("extracted_clauses", [])
    )


In [None]:
def compare_clause_differences(before_output, after_output):
    clauses_before = extract_finance_clauses(before_output)
    clauses_after = extract_finance_clauses(after_output)

    new_clauses = clauses_after - clauses_before
    removed_clauses = clauses_before - clauses_after
    common_clauses = clauses_before & clauses_after

    return {
        "new_clauses_after_adding_interest": list(new_clauses),
        "removed_clauses": list(removed_clauses),
        "common_clauses": list(common_clauses)
    }


In [None]:
clause_diff = compare_clause_differences(
    final_finance_output,
    final_finance_output_after
)


In [None]:
import json
print(json.dumps(clause_diff, indent=4))


In [None]:
print("Before clauses:", final_finance_output["result"]["extracted_clauses"])
print("After clauses:", final_finance_output_after["result"]["extracted_clauses"])


#Legal Pipeline

In [None]:
LEGAL_QUERY = """
Identify clauses related to:
- Termination
- Governing law
- Jurisdiction
- Indemnification
- Limitation of liability
"""


In [None]:
def retrieve_legal_chunks(query: str, top_k: int = 5):
    """
    Retrieve legal-related contract chunks using semantic search.
    """

    query_embedding = embedding_model.encode(query).tolist()

    response = index.query(
        vector=query_embedding,
        top_k=top_k,
        include_metadata=True
    )

    chunks = []
    for match in response["matches"]:
        chunks.append({
            "chunk_id": match["id"],
            "text": match["metadata"]["text"],
            "score": round(match["score"], 3)
        })

    return chunks


legal_chunks = retrieve_legal_chunks(LEGAL_QUERY)


In [None]:
def combine_legal_context(chunks):
    return "\n\n".join(chunk["text"] for chunk in chunks)


legal_context = combine_legal_context(legal_chunks)


In [None]:
def run_legal_agent(legal_context, llm):
    """
    Extract legal clauses from provided context.
    """

    prompt = f"""
You are a legal analysis agent.

Rules:
- Extract only legal clauses from the text
- Copy clauses verbatim
- Do NOT paraphrase
- If nothing is found, return empty lists
- Output strict JSON only

Text:
{legal_context}

Output format:
{{
  "clause_type": "legal",
  "extracted_clauses": [],
  "risk_level": "unknown",
  "confidence": 0.0,
  "evidence": []
}}
"""

    response = llm(prompt)

    # ‚úÖ response is already a dict
    return response


In [None]:
legal_output = run_legal_agent(
    legal_context=legal_context,
    llm=llm
)


In [None]:
def validate_legal_output(agent_output, legal_context):
    validation_notes = []
    validated_clauses = []
    validated_evidence = []

    for clause in agent_output.get("extracted_clauses", []):
        if clause in legal_context:
            validated_clauses.append(clause)
            validated_evidence.append(clause)
        else:
            validation_notes.append(
                f"Removed ungrounded clause: {clause}"
            )

    confidence = agent_output.get("confidence", 0.0)
    if agent_output.get("extracted_clauses") and not validated_clauses:
        confidence = 0.0

    return {
        "clause_type": "legal",
        "extracted_clauses": validated_clauses,
        "risk_level": agent_output.get("risk_level", "unknown"),
        "confidence": confidence,
        "evidence": validated_evidence
    }, validation_notes


In [None]:
validated_legal_output, legal_validation_notes = validate_legal_output(
    legal_output,
    legal_context
)


In [None]:
def generate_legal_risk_summary(validated_output):
    clauses = validated_output.get("extracted_clauses", [])

    if not clauses:
        return "No explicit legal clauses identified. Potential legal ambiguity."

    if any("termination" in c.lower() for c in clauses):
        return "Termination clauses identified; review notice periods and rights."

    return "Standard legal clauses identified with no immediate red flags."



legal_risk_summary = generate_legal_risk_summary(validated_legal_output)


In [None]:
def package_legal_pipeline_output(
    query,
    validated_output,
    risk_summary,
    validation_notes
):
    return {
        "pipeline": "legal",
        "query": query.strip(),
        "result": validated_output,
        "risk_summary": risk_summary,
        "validation_notes": validation_notes,
        "status": "completed"
    }


final_legal_output = package_legal_pipeline_output(
    query=LEGAL_QUERY,
    validated_output=validated_legal_output,
    risk_summary=legal_risk_summary,
    validation_notes=legal_validation_notes
)
final_legal_output

**Extra Task

In [None]:
LEGAL_QUERY = """
Identify clauses related to:
- Termination
- Governing law
- Jurisdiction
- Indemnification
- Limitation of liability
"""


In [None]:
legal_chunks = retrieve_legal_chunks(LEGAL_QUERY)
legal_context = combine_legal_context(legal_chunks)


In [None]:
legal_output_after = run_legal_agent(
    legal_context=legal_context,
    llm=llm
)


In [None]:
validated_legal_output, legal_validation_notes = validate_legal_output(
    legal_output,
    legal_context
)


In [None]:
def extract_legal_clauses(final_output):
    return set(
        final_output
        .get("result", {})
        .get("extracted_clauses", [])
    )



def compare_legal_clause_increase(before_output, after_output):
    before = extract_legal_clauses(before_output)
    after = extract_legal_clauses(after_output)

    return {
        "new_clauses_after_adding_indemnification": list(after - before),
        "total_before": len(before),
        "total_after": len(after)
    }



In [None]:
legal_clause_diff = compare_legal_clause_increase(
    legal_output,
    legal_output_after
)

import json
print(json.dumps(legal_clause_diff, indent=4))


In [None]:
print("===== LEGAL AGENT ANALYSIS =====\n")

print("Before adding 'indemnification':")
print(
    legal_output
    .get("result", {})
    .get("extracted_clauses", [])
)

print("\n-------------------------------\n")

print("After adding 'indemnification':")
print(
    legal_output_after
    .get("result", {})
    .get("extracted_clauses", [])
)


# Operations Pipeline

In [None]:
OPERATIONS_QUERY = """
Identify clauses related to:
- Service levels (SLA)
- Timelines
- Deliverables
- Performance obligations
"""


In [None]:
def retrieve_operations_chunks(query: str, top_k: int = 5):
    """
    Retrieve operations-related contract chunks using semantic search.
    """

    query_embedding = embedding_model.encode(query).tolist()

    response = index.query(
        vector=query_embedding,
        top_k=top_k,
        include_metadata=True
    )

    chunks = []
    for match in response["matches"]:
        chunks.append({
            "chunk_id": match["id"],
            "text": match["metadata"]["text"],
            "score": round(match["score"], 3)
        })

    return chunks


operations_chunks = retrieve_operations_chunks(OPERATIONS_QUERY)


In [None]:
def combine_operations_context(chunks):
    return "\n\n".join(chunk["text"] for chunk in chunks)


operations_context = combine_operations_context(operations_chunks)


In [None]:
def run_operations_agent(operations_context, llm):
    """
    Extract operations-related clauses from context.
    """

    prompt = f"""
You are an operations analysis agent.

Rules:
- Extract only operations-related clauses
- Copy clauses verbatim
- Do NOT paraphrase
- If nothing is found, return empty lists
- Output strict JSON only

Text:
{operations_context}

Output format:
{{
  "clause_type": "operations",
  "extracted_clauses": [],
  "risk_level": "unknown",
  "confidence": 0.0,
  "evidence": []
}}
"""

    response = llm(prompt)
    return response


operations_output = run_operations_agent(
    operations_context=operations_context,
    llm=llm
)



In [None]:
def validate_operations_output(agent_output, operations_context):
    validation_notes = []
    validated_clauses = []
    validated_evidence = []

    for clause in agent_output.get("extracted_clauses", []):
        if clause in operations_context:
            validated_clauses.append(clause)
            validated_evidence.append(clause)
        else:
            validation_notes.append(
                f"Removed ungrounded clause: {clause}"
            )

    confidence = agent_output.get("confidence", 0.0)
    if agent_output.get("extracted_clauses") and not validated_clauses:
        confidence = 0.0

    return {
        "clause_type": "operations",
        "extracted_clauses": validated_clauses,
        "risk_level": agent_output.get("risk_level", "unknown"),
        "confidence": confidence,
        "evidence": validated_evidence
    }, validation_notes




validated_operations_output, operations_validation_notes = (
    validate_operations_output(
        operations_output,
        operations_context
    )
)


In [None]:
def generate_operations_risk_summary(validated_output):
    clauses = validated_output.get("extracted_clauses", [])

    if not clauses:
        return "No explicit operational obligations identified. Potential delivery ambiguity."

    if any("sla" in c.lower() or "service level" in c.lower() for c in clauses):
        return "Service level obligations identified; review performance commitments."

    return "Standard operational clauses identified with manageable risk."



operations_risk_summary = generate_operations_risk_summary(
    validated_operations_output
)


In [None]:
def package_operations_pipeline_output(
    query,
    validated_output,
    risk_summary,
    validation_notes
):
    return {
        "pipeline": "operations",
        "query": query.strip(),
        "result": validated_output,
        "risk_summary": risk_summary,
        "validation_notes": validation_notes,
        "status": "completed"
    }


final_operations_output = package_operations_pipeline_output(
    query=OPERATIONS_QUERY,
    validated_output=validated_operations_output,
    risk_summary=operations_risk_summary,
    validation_notes=operations_validation_notes
)

final_operations_output

**Extra Task

In [None]:
OPERATIONS_QUERY_AFTER = """
Identify clauses related to:
- Service levels (SLA)
- Timelines
- Deliverables
- Performance obligations
- Uptime
"""


In [None]:
operations_chunks_after = retrieve_operations_chunks(OPERATIONS_QUERY_AFTER)
operations_context_after = combine_operations_context(operations_chunks_after)

operations_output_after = run_operations_agent(
    operations_context=operations_context_after,
    llm=llm
)

validated_operations_output_after, _ = validate_operations_output(
    operations_output_after,
    operations_context_after
)


In [None]:
print("===== OPERATIONS AGENT CLAUSE ANALYSIS =====\n")

print("Before adding 'uptime':")
print(
    validated_operations_output
    .get("extracted_clauses", [])
)

print("\n-------------------------------------------\n")

print("After adding 'uptime':")
print(
    validated_operations_output_after
    .get("extracted_clauses", [])
)


In [None]:
before_set = set(
    validated_operations_output.get("extracted_clauses", [])
)

after_set = set(
    validated_operations_output_after.get("extracted_clauses", [])
)

print("\n===== CLAUSE DIFFERENCE SUMMARY =====")
print("New clauses after adding 'uptime':", list(after_set - before_set))
print("Total before:", len(before_set))
print("Total after:", len(after_set))

# Coordinator: Merging Agent Outputs

Take all these independent outputs and bring them together into one unified contract analysis.

In [None]:
# Assuming these are outputs from previous pipeline runs
legal_output = final_legal_output
compliance_output = final_compliance_output
finance_output = final_finance_output
operations_output = final_operations_output


In [None]:
OUTPUT_SCHEMA = {
    "metadata": {
        "contract_id": None,
        "analysis_timestamp": None
    },
    "legal_analysis": {},
    "compliance_analysis": {},
    "finance_analysis": {},
    "operations_analysis": {},
    "overall_risk": None,
    "confidence": None
}


In [None]:
def coordinator_merge(
    legal_output,
    compliance_output,
    finance_output,
    operations_output
):
    return {
        "legal_analysis": legal_output.get("result", legal_output),
        "compliance_analysis": compliance_output.get("result", compliance_output),
        "finance_analysis": finance_output.get("result", finance_output),
        "operations_analysis": operations_output.get("result", operations_output)
    }

here,we are combined all the previous pipeline's output into one output json format which is defined in the previous step(output_schema) format


In [None]:
merged_output = coordinator_merge(
    legal_output,
    compliance_output,
    finance_output,
    operations_output
)
merged_output.keys()


In [None]:
merged_output

In [None]:
def compute_overall_risk(merged_output):
    risk_map = {
        "low": 1,
        "medium": 2,
        "high": 3,
        "unknown": 0
    }

    reverse_risk_map = {v: k for k, v in risk_map.items()}

    risks = []

    for section in [
        "legal_analysis",
        "compliance_analysis",
        "finance_analysis",
        "operations_analysis"
    ]:
        risk_level = merged_output.get(section, {}).get("risk_level", "unknown")
        risks.append(risk_map.get(risk_level, 0))

    overall_risk_score = max(risks)
    overall_risk = reverse_risk_map[overall_risk_score]

    return overall_risk


In [None]:
overall_risk = compute_overall_risk(merged_output)
overall_risk

We take the individual risk levels from:

  - Legal , Compliance,Finance , Operations

‚Ä¶and compute ONE overall contract risk.

üìå This answers the business question:

‚ÄúIs this contract safe or risky overall?‚Äù

Without this step:
 * Users must manually interpret 4 risks

* No executive summary possible

* No dashboard-level insight

This step:
* Simplifies decision-making

* Enables alerts & automation

* Makes ClauseAI useful to non-lawyers

In [None]:
def compute_overall_confidence(final_output):
    confidences = []

    for section in [
        "legal_analysis",
        "compliance_analysis",
        "finance_analysis",
        "operations_analysis"
    ]:
        confidence = final_output.get(section, {}).get("confidence")
        if confidence is not None:
            confidences.append(confidence)

    if not confidences:
        return 0.0

    return round(min(confidences), 2)


In [None]:
overall_confidence_score = compute_overall_confidence(final_output)
overall_confidence_score

In [None]:
from datetime import datetime

def build_final_output(
    merged_output,
    overall_risk,
    contract_id="unknown"
):
    final_output = {
        "metadata": {
            "contract_id": contract_id,
            "analysis_timestamp": datetime.utcnow().isoformat()
        },
        "legal_analysis": merged_output.get("legal_analysis", {}),
        "compliance_analysis": merged_output.get("compliance_analysis", {}),
        "finance_analysis": merged_output.get("finance_analysis", {}),
        "operations_analysis": merged_output.get("operations_analysis", {}),
        "overall_risk": overall_risk,
        "confidence": overall_confidence_score
    }

    return final_output


In [None]:
final_output = build_final_output(
    merged_output=merged_output,
    overall_risk=overall_risk,
    contract_id="contract_001"
)


In [None]:
import json
print(json.dumps(final_output, indent=4))


Here, we assembled everything into ONE final JSON object that ClauseAI will return

In [None]:
risk_map = {"low": 1, "medium": 2, "high": 3, "unknown": 0}

highest_risk_value = 0
highest_risk_section = None

for section in ["legal_analysis", "compliance_analysis", "finance_analysis", "operations_analysis"]:
    agent_result = merged_output.get(section, {})
    risk_value = risk_map.get(agent_result.get("risk_level", "unknown"), 0)

    if risk_value > highest_risk_value:
        highest_risk_value = risk_value
        highest_risk_section = section

print("Highest-Risk Section:", highest_risk_section)
