#### Importing Of Libaries

In [3]:
import json
from typing import Dict
from langchain_community.document_loaders import PyPDFLoader,TextLoader,UnstructuredWordDocumentLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import ChatHuggingFace,HuggingFaceEndpoint
from langchain_core.prompts import PromptTemplate
from langgraph.graph import StateGraph,END
from pinecone import Pinecone
from typing import Annotated,TypedDict,List
import operator
from langchain_core.utils.json import parse_json_markdown

#### hugging face api integretion in your aiproject environment

In [4]:
# conda activate your env_name 
# huggingface-cli login
#Enter your token (input will not be visible):"your api token" (if you you want paste :right click)
#Add token as git credential? (Y/n) n
#(aiproject) C:\Users\Admin>huggingface-cli whoami

#### Model

In [5]:
llm = HuggingFaceEndpoint(repo_id = "mistralai/Mistral-7B-Instruct-v0.2",
                              task ="text-generation and text-analysis"
                         )
chat_model = ChatHuggingFace(llm = llm)

#### Parsing ( To get Structured Output)

In [6]:
class AgentState(TypedDict):
    chunks: List
    finance: list
    legal: list
    operation: list
    compliance: list
    final: list

#### Loading the document and Splitting of data

In [7]:
# function to load based on there types
def doc_types_and_split(file_path):
    if file_path.endswith(".pdf"):
        loader = PyPDFLoader(file_path)
    elif file_path.endswith(".txt"):
        loader = TextLoader(file_path)
    elif filepath.endswith(".docx"):
        loader = UnstructuredWordDocumentLoader(filepath)
    else:
        raise ValueError("Unsupported file type .Supported:pdf,txt,docx")
    docs = loader.load()  # text of that page ,meta (like page number)
    # consolidate thr text from multiple document into one continous string
    splitter = RecursiveCharacterTextSplitter(chunk_size = 800,chunk_overlap = 100)
    return splitter.split_documents(docs)

#### Classifying of domain

In [8]:
def classifier(state: AgentState):
    for c in state["chunks"]:
        c.metadata["domains"] = ["finance", "legal", "operation", "compliance"]
    return {"chunks": state["chunks"]}

#### Prompt Templates

In [9]:
finance_prompt = PromptTemplate(
    input_variables=["clause"],
    template="""
Return JSON ONLY.

{{
  "risk_level": "LOW | MEDIUM | HIGH",
  "risk_type":['payment terms','fees','penalties', 'credit_risks', 'revenue_impact']
  "impact": "string",
  "recommendation": "string"
}}
Clause:
{clause}
"""
)

In [10]:
legal_prompt = PromptTemplate(
    input_variables=["clause"],
    template="""
Return JSON ONLY.

{{
  "risk_level": "LOW | MEDIUM | HIGH",
  "risk_type":['summary','key_obligations', 'legal_risks', 'termination_rules', 'liability_terms']
  "legal_issue": "string",
  "explanation": "string"
}}

Clause:
{clause}
"""
)

In [11]:
operation_prompt = PromptTemplate(
    input_variables=["clause"],
    template="""
Return JSON ONLY.

{{
  "risk_level": "LOW | MEDIUM | HIGH",
  "risk_type":['operational_dependencies', 'onboarding_requirements', 'security_concerns', 'SLA_items', 'action_items']
  "dependency": "string",
  "operational_impact": "string"
}}
Clause:
{clause}
"""
)

In [12]:
compliance_prompt = PromptTemplate(
    input_variables=["clause", "finance_findings"],
    template="""
Clause:
{clause}

Finance Findings:
{finance_findings}

Return JSON ONLY.

{{
  "risk_level": "LOW | MEDIUM | HIGH",
  "compliance_area": ['deliverables', 'notice_periods', 'renewal_terms', 'milestones', 'compliance_checklist']
  "violation": "string",
  "required_action": "string"
}}
"""
)

#### Embeddings 

In [14]:
from langchain_huggingface.embeddings import HuggingFaceEmbeddings

embedding_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)


#### Pine Cone intergration

In [2]:
## in anaconda prompt
# conda activate  your env_name
# setx PINECONE_API_KEY "your_api_key"
# setx PINECONE_ENVIORNMENT_NAME "your _env'

In [15]:
import os
from langchain_pinecone import PineconeVectorStore
from pinecone import Pinecone, ServerlessSpec
pc= Pinecone(api_key = os.environ.get("PINECONE_API_KEY"))
index_name = pc.Index("contract-analysis")
vectorstore = PineconeVectorStore(
    index=index_name,
    embedding=embedding_model
    )
print("vector initlaized successfully")


vector initlaized successfully


#### storing of embeddings 

In [16]:
def store_to_pinecone(
    clause_id: str,
    domain: str,
    clause_text: str,
    structured_output: dict
):
    vectorstore.add_texts(
        texts=[clause_text],
        metadatas=[{
            "clause_id": clause_id,
            "domain": domain,
            "risk_level": structured_output.get("risk_level", "UNKNOWN"),
            "output": json.dumps(structured_output)
        }],
        ids=[f"{clause_id}_{domain}"]
    )





#### Finance agent

In [17]:
def finance_agent(state: AgentState):
    results = []
    for i, c in enumerate(state["chunks"]):
        res = chat_model.invoke(finance_prompt.format(clause=c.page_content)).content
        try:
            parsed = parse_json_markdown(res)
        except Exception as e:
            print(f"error parsing chunk {i} :{e}")
            continue
        store_to_pinecone(f"chunk_{i}", "finance", c.page_content, parsed)
        results.append({"clause_id": f"chunk_{i}", "output": parsed})
    return {"finance": results}


#### Legal_agent

In [18]:
def legal_agent(state: AgentState):
    results = []
    for i, c in enumerate(state["chunks"]):
        res = chat_model.invoke(legal_prompt.format(clause=c.page_content)).content
        try:
            parsed = parse_json_markdown(res)
        except Exception as e:
            print(f"error parsing chunk {i} :{e}")
            continue
        store_to_pinecone(f"chunk_{i}", "legal", c.page_content, parsed)
        results.append({"clause_id": f"chunk_{i}", "output": parsed})
    return {"legal": results}


#### Operation_agent

In [19]:
def operation_agent(state: AgentState):
    results = []
    for i, c in enumerate(state["chunks"]):
        res = chat_model.invoke(operation_prompt.format(clause=c.page_content)).content
        try:
            parsed = parse_json_markdown(res)
        except Exception as e:
            print(f"error parsing chunk {i} :{e}")
            continue
        store_to_pinecone(f"chunk_{i}", "operation", c.page_content, parsed)
        results.append({"clause_id": f"chunk_{i}", "output": parsed})
    return {"operation": results}


#### Compliance_agent

In [20]:
def compliance_agent(state: AgentState):
    results = []

    finance_map = {
        f["clause_id"]: f["output"]
        for f in state.get("finance", [])
    }

    for i, c in enumerate(state["chunks"]):
        finance_ctx = finance_map.get(f"chunk_{i}", {})
        res = chat_model.invoke(
            compliance_prompt.format(
                clause=c.page_content,
                finance_findings=json.dumps(finance_ctx)
            )
        ).content

        try:
            parsed = parse_json_markdown(res)
        except Exception as e:
            print(f"error parsing chunk {i} :{e}")
            continue
        store_to_pinecone(f"chunk_{i}", "compliance", c.page_content, parsed)
        results.append({"clause_id": f"chunk_{i}", "output": parsed})

    return {"compliance": results}


##### resolver

In [21]:
def resolver(state: AgentState):
    final = []
    for key in ["finance","legal","operation","compliance"]:
        final.extend(state.get(key, []))
    state["final"] = final
    return state


#### Langgraph

In [25]:
graph = StateGraph(AgentState)
graph.add_node("classifier", classifier)
graph.add_node("finance", finance_agent)
graph.add_node("legal", legal_agent)
graph.add_node("operation", operation_agent)
graph.add_node("compliance", compliance_agent)
graph.add_node("resolver", resolver)
graph.set_entry_point("classifier")
graph.add_edge("classifier", "finance")
graph.add_edge("classifier", "legal")
graph.add_edge("classifier", "operation")
graph.add_edge("classifier", "compliance")
graph.add_edge("finance", "resolver")
graph.add_edge("legal", "resolver")
graph.add_edge("operation", "resolver")
graph.add_edge("compliance", "resolver")
graph.add_edge("resolver", END)
app = graph.compile()


In [19]:
if __name__ == "__main__":
    chunks = doc_types_and_split("Documents/Documents/contract/Contract document.pdf")
    output =app.invoke({"chunks": chunks})
    print(json.dumps(output["final"], indent=2))


[
  {
    "clause_id": "chunk_0",
    "output": {
      "risk_level": "MEDIUM",
      "risk_type": [
        "payment terms"
      ],
      "impact": "Late payments from client may cause cash flow issues for the contractor.",
      "recommendation": "Consider including a clause that allows for interest on late payments or invoices due. Also, consider setting up a payment schedule or milestone payments to ensure consistent cash flow."
    }
  },
  {
    "clause_id": "chunk_1",
    "output": {
      "risk_level": "MEDIUM",
      "risk_type": [
        "payment terms",
        "credit_risks"
      ],
      "impact": "Late payment by the client beyond the agreed 15-day term may impact Contractor's cash flow and financial stability.",
      "recommendation": "Consider implementing a formal invoicing and payment process with clear communication regarding payment terms and potential consequences for late payments."
    }
  },
  {
    "clause_id": "chunk_2",
    "output": {
      "risk_level":