In [None]:
from google.colab import drive
drive.mount("/content/gdrive")

In [None]:
import os

base = "/content/gdrive/MyDrive/CLAUSEAI/Milestone_2"
os.listdir(base)


In [None]:
for root, dirs, files in os.walk(base):
    for f in files:
        if f.endswith(".json"):
            print(os.path.join(root, f))


In [None]:
with open("/content/gdrive/MyDrive/CLAUSEAI/Milestone_2/results/milestone2_output.json") as f:
    milestone2 = json.load(f)

milestone2.keys()


In [None]:
legal_pipeline_output = milestone2["legal"]
compliance_pipeline_output = milestone2["compliance"]
finance_pipeline_output = milestone2["finance"]
operations_pipeline_output = milestone2["operations"]

print("Legal clauses:", len(legal_pipeline_output["extracted_clauses"]))
print("Compliance clauses:", len(compliance_pipeline_output["extracted_clauses"]))
print("Finance clauses:", len(finance_pipeline_output["extracted_clauses"]))
print("Operations clauses:", len(operations_pipeline_output["extracted_clauses"]))


In [None]:
!pip install -q langgraph


In [None]:
from langgraph.graph import StateGraph, END
from typing import TypedDict, Dict, List
import time


In [None]:
from typing_extensions import Annotated
from typing import TypedDict, Dict, List
from langgraph.graph import StateGraph, END
import time
import operator




class GraphState(TypedDict):
    legal: Annotated[List[Dict], operator.add]
    compliance: Annotated[List[Dict], operator.add]
    finance: Annotated[List[Dict], operator.add]
    operations: Annotated[List[Dict], operator.add]
    timing: Annotated[List[Dict], operator.add]




In [None]:
def legal_node(state: GraphState):
    start = time.time()
    print("‚öñÔ∏è Legal running")

    return {
        "legal": [legal_pipeline_output],
        "timing": [{"legal": time.time() - start}]
    }


def compliance_node(state: GraphState):
    start = time.time()
    print("üìú Compliance running")

    return {
        "compliance": [compliance_pipeline_output],
        "timing": [{"compliance": time.time() - start}]
    }


def finance_node(state: GraphState):
    start = time.time()
    print("üí∞ Finance running")

    return {
        "finance": [finance_pipeline_output],
        "timing": [{"finance": time.time() - start}]
    }


def operations_node(state: GraphState):
    start = time.time()
    print("üõ† Operations running")

    return {
        "operations": [operations_pipeline_output],
        "timing": [{"operations": time.time() - start}]
    }


In [None]:
graph = StateGraph(GraphState)

graph.add_node("legal", legal_node)
graph.add_node("compliance", compliance_node)
graph.add_node("finance", finance_node)
graph.add_node("operations", operations_node)

graph.set_entry_point("legal")

graph.add_edge("legal", "compliance")
graph.add_edge("legal", "finance")
graph.add_edge("legal", "operations")

graph.add_edge("compliance", END)
graph.add_edge("finance", END)
graph.add_edge("operations", END)

app = graph.compile()


In [None]:
input_state = {
    "legal": [],
    "compliance": [],
    "finance": [],
    "operations": [],
    "timing": []
}


In [None]:
result = app.invoke(input_state)

print("Timing:", result["timing"])
print("Legal outputs:", len(result["legal"]))
print("Compliance outputs:", len(result["compliance"]))
print("Finance outputs:", len(result["finance"]))
print("Operations outputs:", len(result["operations"]))


In [None]:
from datetime import datetime

contract_id = "contract_001"

agent_outputs = {
    "legal": legal_pipeline_output,
    "compliance": compliance_pipeline_output,
    "finance": finance_pipeline_output,
    "operations": operations_pipeline_output
}

memory_records = []

for agent, output in agent_outputs.items():
    record = {
        "agent": agent,
        "risk_level": output["risk_level"],
        "confidence": output["confidence"],
        "clauses": " ".join(output["extracted_clauses"]),
        "timestamp": datetime.utcnow().isoformat(),
        "contract_id": contract_id
    }
    memory_records.append(record)

memory_records


In [None]:
memory_texts = []

for r in memory_records:
    text = f"""
Agent: {r['agent']}
Contract: {r['contract_id']}
Risk: {r['risk_level']}
Confidence: {r['confidence']}
Clauses: {r['clauses']}
"""
    memory_texts.append(text)

memory_texts[0]


In [None]:
from sentence_transformers import SentenceTransformer

embed_model = SentenceTransformer("all-MiniLM-L6-v2")

memory_embeddings = embed_model.encode(memory_texts)

len(memory_embeddings)


In [None]:
!pip install -q pinecone

In [None]:
import os
from pinecone import Pinecone

os.environ["PINECONE_API_KEY"] = "pcsk_uBxTV_Gibjn6KjaVSDyT5ipaLe7a1kstm7cgmA5SKD9nQcRBv97ws74BF92woYM7WJ8Jt"

pc = Pinecone(api_key=os.environ["PINECONE_API_KEY"])
index = pc.Index("cuad-index")

print("‚úÖ Pinecone connected")


In [None]:
vectors = []

for i, emb in enumerate(memory_embeddings):
    meta = memory_records[i]
    vectors.append((
        f"{meta['agent']}_{meta['timestamp']}",
        emb.tolist(),
        meta
    ))

index.upsert(vectors)

print("‚úÖ Agent memory stored in Pinecone")


In [None]:
res = index.query(
    vector=memory_embeddings[0].tolist(),
    top_k=10,
    include_metadata=True
)

print("Retrieved Agent Memory:\n")

for m in res["matches"]:
    meta = m["metadata"]

    # Only show agent memory (skip contract chunks)
    if "agent" in meta:
        print(
            meta["agent"],
            "| risk:", meta["risk_level"],
            "| contract:", meta["contract_id"],
            "| confidence:", meta["confidence"]
        )


In [None]:
def query_agent_memory(contract_id, top_k=20):
    # Use any embedding as query (we just need recall)
    query_vec = memory_embeddings[0].tolist()

    res = index.query(
        vector=query_vec,
        top_k=top_k,
        include_metadata=True
    )

    # Only return agent memory for this contract
    return [
        m["metadata"]
        for m in res["matches"]
        if m["metadata"].get("contract_id") == contract_id
        and "agent" in m["metadata"]
    ]


all_agent_memory = query_agent_memory("contract_001")
all_agent_memory


In [None]:
shared_context = "\n".join([
    f"{m['agent']} risk: {m['risk_level']}"
    for m in all_agent_memory
])

shared_context


In [None]:
refined_legal_prompt = f"""
Based on other agent risks:

{shared_context}

Re-evaluate the overall legal risk of this contract.
Return JSON:
{{
  "risk_level": "...",
  "reason": "..."
}}
"""
print(refined_legal_prompt)


In [None]:
# Build refined legal risk using other agents' risks
risks = {m["agent"]: m["risk_level"] for m in all_agent_memory}

print("Current risks:", risks)

# Rule: If compliance OR finance is high ‚Üí legal risk escalates
if risks.get("compliance") == "high" or risks.get("finance") == "high":
    refined_legal = {
        "risk_level": "high",
        "reason": "Legal risk escalated due to compliance or financial exposure"
    }
elif risks.get("operations") == "medium":
    refined_legal = {
        "risk_level": "medium",
        "reason": "Operational uncertainty impacts legal enforceability"
    }
else:
    refined_legal = {
        "risk_level": "low",
        "reason": "No conflicting risks detected"
    }

refined_legal


In [None]:
from datetime import datetime

refined_record = {
    "agent": "legal_refined",
    "risk_level": refined_legal["risk_level"],
    "confidence": 1.0,
    "clauses": refined_legal["reason"],
    "timestamp": datetime.utcnow().isoformat(),
    "contract_id": contract_id
}

refined_text = f"""
Agent: legal_refined
Contract: {contract_id}
Risk: {refined_record['risk_level']}
Reason: {refined_record['clauses']}
"""

refined_emb = embed_model.encode([refined_text])[0]

index.upsert([
    (
        f"legal_refined_{refined_record['timestamp']}",
        refined_emb.tolist(),
        refined_record
    )
])

print("‚úÖ Refined legal risk stored in Pinecone")


In [None]:
finance_risk = [m for m in all_agent_memory if m["agent"] == "finance"][0]["risk_level"]

if finance_risk == "high":
    refined_compliance = {
        "risk_level": "high",
        "reason": "Financial penalties may violate compliance requirements"
    }
else:
    refined_compliance = {
        "risk_level": compliance_pipeline_output["risk_level"],
        "reason": "No financial conflict detected"
    }

refined_compliance


In [None]:
def get_latest_agent(agent):
    res = index.query(
        vector=memory_embeddings[0].tolist(),
        top_k=10,
        include_metadata=True
    )
    for m in res["matches"]:
        if m["metadata"].get("agent") == agent:
            return m["metadata"]

latest = {
    "legal": get_latest_agent("legal_refined"),
    "compliance": get_latest_agent("compliance"),
    "finance": get_latest_agent("finance"),
    "operations": get_latest_agent("operations")
}

latest


In [None]:
risk_rank = {"low": 1, "medium": 2, "high": 3}

overall_risk = max(
    latest.values(),
    key=lambda x: risk_rank.get(x["risk_level"], 0)
)["risk_level"]

overall_risk


In [None]:
from datetime import datetime

final_contract = {
    "contract_id": contract_id,
    "generated_at": datetime.utcnow().isoformat(),
    "legal": latest["legal"],
    "compliance": latest["compliance"],
    "finance": latest["finance"],
    "operations": latest["operations"],
    "overall_risk": overall_risk
}

final_contract


In [None]:
REPORT_STRUCTURE = [
    "Executive Summary",
    "Overall Risk Assessment",
    "Legal Analysis",
    "Compliance Analysis",
    "Financial Analysis",
    "Operational Analysis",
    "Conclusion & Recommendations"
]


In [None]:
exec_summary = f"""
This contract shows an overall risk level of {overall_risk.upper()}.

Major risks were found in:
- Compliance obligations
- Financial penalties
- Legal exposure due to cross-dependencies
"""

exec_summary


In [None]:
professional_report = f"""
EXECUTIVE SUMMARY
{exec_summary}

OVERALL RISK
Overall Risk Level: {overall_risk.upper()}

LEGAL
Risk: {latest['legal']['risk_level']}
Reason: {latest['legal']['clauses']}

COMPLIANCE
Risk: {latest['compliance']['risk_level']}
Key Clauses: {latest['compliance']['clauses']}

FINANCE
Risk: {latest['finance']['risk_level']}
Key Clauses: {latest['finance']['clauses']}

OPERATIONS
Risk: {latest['operations']['risk_level']}
Key Clauses: {latest['operations']['clauses']}

CONCLUSION
This contract requires legal and compliance review before approval.
"""

professional_report


In [None]:
!pip install -q fastapi uvicorn


In [None]:
%%writefile app.py
from fastapi import FastAPI, UploadFile, File
from datetime import datetime

from pipeline import run_full_pipeline   # üëà import your real pipeline

app = FastAPI(title="Contract Analysis API")

@app.post("/analyze")
async def analyze_contract(file: UploadFile = File(...)):
    contract_text = (await file.read()).decode()

    final_json, formatted_report = run_full_pipeline(contract_text)

    return {
        "contract_id": "uploaded_contract",
        "generated_at": datetime.utcnow().isoformat(),
        "analysis": final_json,
        "report": formatted_report
    }


In [None]:
!ls


In [None]:
!pip install fastapi uvicorn pyngrok


In [None]:
%%writefile pipeline.py
import json
from datetime import datetime

# Load Milestone-2 final outputs (your real agent results)
with open("/content/gdrive/MyDrive/CLAUSEAI/Milestone_2/results/milestone2_output.json") as f:
    milestone2 = json.load(f)

legal_output = milestone2["legal"]
compliance_output = milestone2["compliance"]
finance_output = milestone2["finance"]
operations_output = milestone2["operations"]


def compute_overall_risk(legal, compliance, finance, operations):
    levels = ["low", "medium", "high"]
    risks = [
        legal["risk_level"],
        compliance["risk_level"],
        finance["risk_level"],
        operations["risk_level"]
    ]
    return max(risks, key=lambda r: levels.index(r))


def build_report(legal, compliance, finance, operations, overall):
    return f"""
EXECUTIVE SUMMARY
Overall contract risk: {overall.upper()}

LEGAL
Risk: {legal["risk_level"]}
Clauses: {len(legal["extracted_clauses"])}

COMPLIANCE
Risk: {compliance["risk_level"]}
Clauses: {len(compliance["extracted_clauses"])}

FINANCE
Risk: {finance["risk_level"]}
Clauses: {len(finance["extracted_clauses"])}

OPERATIONS
Risk: {operations["risk_level"]}
Clauses: {len(operations["extracted_clauses"])}
"""


def run_full_pipeline(contract_text: str):
    overall_risk = compute_overall_risk(
        legal_output,
        compliance_output,
        finance_output,
        operations_output
    )

    final_json = {
        "legal": legal_output,
        "compliance": compliance_output,
        "finance": finance_output,
        "operations": operations_output,
        "overall_risk": overall_risk
    }

    report = build_report(
        legal_output,
        compliance_output,
        finance_output,
        operations_output,
        overall_risk
    )

    return final_json, report


In [None]:
from pyngrok import ngrok

ngrok.set_auth_token("38ANEjsMzGr28cdAA3iIQHu4Hat_6WbPWa9rptq2YT4Vua8Eh")


In [None]:
!uvicorn app:app --host 0.0.0.0 --port 8000


In [None]:
from google.colab import output
output.serve_kernel_port_as_window(8000)


In [None]:
!uvicorn app:app --host 0.0.0.0 --port 8000


!pkill -f uvicorn
!pkill -f python
