#### Document Loading and Parsing of the document

In [2]:
from langchain_community.document_loaders import PyPDFLoader,TextLoader,UnstructuredWordDocumentLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [3]:
# function to load based on there types
def doc_types_and_split(file_path):
    if file_path.endswith(".pdf"):
        loader = PyPDFLoader(file_path)
    elif file_path.endswith(".txt"):
        loader = TextLoader(file_path)
    elif filepath.endswith(".docx"):
        loader = UnstructuredWordDocumentLoader(filepath)
    else:
        raise ValueError("Unsupported file type .Supported:pdf,txt,docx")
    docs = loader.load()  # text of that page ,meta (like page number)
    # consolidate thr text from multiple document into one continous string
    text ="\n".join([d.page_content for d in docs])
    splitter = RecursiveCharacterTextSplitter(chunk_size = 800,chunk_overlap = 100)
    return  text

In [4]:
chunk = doc_types_and_split("Documents/finance.pdf")
chunk

'Title: Financial Analysis Report – FY 2024 \n1. Executive Summary \nThe total revenue for FY 2024 amounted to $18.4 million, reflecting a 12.8% increase \ncompared to FY 2023. Operating expenses totaled $11.9 million, resulting in a gross profit \nmargin of 35%. Net income after taxes was recorded at $4.2 million. \n2. Revenue Breakdown \nSegment Revenue Growth \nSoftware Sales $9.6M +18% \nSubscription Services $6.1M +15% \nSupport & Maintenance $2.7M -3% \n3. Expense Distribution \n• Salaries & Staff Benefits: $6.2M \n• Marketing & Advertising: $2.1M \n• Research & Development: $1.4M \n• Administrative and Other Expenses: $2.2M \n4. Cash Flow Overview \n• Net cash from operations: $5.8M \n• Net cash used in investing activities: $2.1M \n• Net cash used in financing activities: $1.6M \n• Cash balance on Dec 31, 2024: $3.9M \n5. Key Financial Highlights \n• EBITDA grew by 9.3% \n• Return on Investment (ROI): 17.6% \n• Current Ratio: 1.84 \n• Debt-to-Equity Ratio: 0.42 \n6. Identified 

#### Embeddings

In [5]:
from sentence_transformers import SentenceTransformer


In [6]:
# Sentence-transformers model (local embeddings)
embed_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

In [7]:
def get_embedding(text: str):
    """text → numerical 384-dim vector"""
    return embed_model.encode(text, convert_to_numpy=True, show_progress_bar=False)

In [8]:
embed = get_embedding(chunk)
embed

array([ 7.12812021e-02, -1.37677323e-02, -6.25134259e-02,  1.24687394e-02,
       -3.00184451e-02, -1.26012219e-02, -2.12263204e-02,  8.20677355e-02,
       -2.55971421e-02,  4.60892506e-02, -3.16961482e-02, -5.82626760e-02,
       -7.00023174e-02, -1.77769121e-02,  8.70276056e-03, -5.42714819e-03,
        2.78291982e-02, -1.16364308e-01, -3.95420119e-02, -3.14121433e-02,
        5.11302985e-02, -1.58130471e-02, -1.18750497e-04, -7.90839717e-02,
        2.66289506e-02, -8.04848075e-02, -3.84493880e-02,  1.05106039e-02,
       -6.87157512e-02, -6.25701994e-02,  6.07913621e-02,  4.43126187e-02,
        6.08867295e-02, -4.77125123e-03,  1.67557374e-02,  1.08036969e-03,
        3.69261093e-02,  1.03846490e-02,  3.95939872e-02, -2.09918711e-02,
       -6.18464723e-02, -1.06557680e-03, -4.84407879e-02, -9.65951011e-02,
       -1.77784897e-02, -6.14478551e-02,  6.88503450e-03,  5.19032627e-02,
        7.21641630e-02,  4.62564565e-02, -1.85571183e-02,  2.25850958e-02,
        3.28906812e-02,  

In [10]:
from huggingface_hub import InferenceClient

### SETTING UP THE MODEL AND HUGGING FACE TOKEN

In [11]:
HF_TOKEN = "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"
if not HF_TOKEN:
    raise RuntimeError("HF_TOKEN_NOT_SET")
MODEL_NAME = "Qwen/Qwen2.5-7B-Instruct"
client = InferenceClient(MODEL_NAME,token = HF_TOKEN)



### FINANCE AI AGENT 

In [23]:
Prompt = """ You are a finance analyst. Analyse the text and provide your findings using clear ,well-formatted bullet points or headings for the folowing categories:
            payment_terms, fees, penalties, credit_risks, revenue_impact.Do not use json
         TEXT: {input_text}
        """
def finance(text: str): 
    prompt = Prompt.format(input_text = text)
    response = client.chat_completion(messages=[{"role":"user","content":prompt}],max_tokens= 512,temperature = 0.1).choices[0].message["content"]
    return response

#### Sample Document Testing for Finance 

In [24]:
text_path = "Documents/finance_agreement (1).pdf"
text_doc =doc_types_and_split(text_path)
text_embed = get_embedding(text_doc)
finance(text_embed)

"Based on the provided text, which appears to be a series of numerical values, it's not immediately clear what these values represent without additional context. However, I will attempt to analyze the text based on the categories provided: payment_terms, fees, penalties, credit_risks, and revenue_impact. Please note that the analysis will be speculative due to the lack of context.\n\n- **Payment Terms:**\n  - No specific payment terms can be directly inferred from the numerical values. These values could represent various financial metrics or indicators, but without more context, it's impossible to determine if they relate to payment terms.\n\n- **Fees:**\n  - The text contains several positive and negative values, which could potentially represent fees. Positive values might indicate fees charged, while negative values could represent fees paid or refunds. For example, values like 0.0651680166, 0.0549474172, and 0.0593127236 could be fees.\n\n- **Penalties:**\n  - Negative values coul

#### lEGAL AI AGENT

In [15]:
Prompt = """  
You are a senior contract lawyer. Analyse the text and provide your findings using clear ,well-formatted bullet points or headings for the folowing categories:
summary, key_obligations, legal_risks, termination_rules, liability_terms.Do not use json
"""
      
def legal(text: str): 
    prompt = Prompt.format(input_text = text)
    response = client.chat_completion(messages=[{"role":"user","content":prompt}],max_tokens= 512,temperature = 0.1).choices[0].message["content"]
    return response


#### Sample Document Testing for Legal

In [16]:
text_path = "Documents/agreement_between_a_firm_and_a_broker_for_agency09032021083534.pdf"
text_doc = doc_types_and_split(text_path)
text_embed = get_embedding(text_doc)
legal(text_embed)

"### Summary\n- The contract is a service agreement between Party A (the Service Provider) and Party B (the Client).\n- The primary purpose of the agreement is to provide software development services.\n- The term of the agreement is for 12 months, starting from the date of signing.\n\n### Key Obligations\n- **Party A (Service Provider):**\n  - To provide software development services as specified in the scope of work.\n  - To deliver the software on time and in accordance with the agreed specifications.\n  - To ensure the software is of high quality and meets the Client's requirements.\n- **Party B (Client):**\n  - To provide necessary information and resources for the development process.\n  - To review and approve the deliverables at specified intervals.\n  - To make timely payments as per the payment schedule.\n\n### Legal Risks\n- **Scope of Work Ambiguity:**\n  - The scope of work is not clearly defined, which could lead to disputes over what services are included.\n- **Intellect

#### Contract AI AGENT

In [12]:
Prompt = """ 
You are a contract manager. Analyse the text and provide your findings using clear ,well-formatted bullet points or headings for the folowing categories:
deliverables, notice_periods, renewal_terms, milestones, compliance_checklist.Do not use json
"""
def contract(text: str): 
    prompt = Prompt.format(input_text = text)
    response = client.chat_completion(messages=[{"role":"user","content":prompt}],max_tokens= 512,temperature = 0.1).choices[0].message["content"]
    return response

#### Sample Document Testing for Contract 

In [13]:
text_path = "Documents/contract_filled (1) (1).pdf"
text_doc = doc_types_and_split(text_path)
text_embed =  get_embedding(text_doc)
contract(text_embed)

'### Deliverables\n- The contract specifies that the vendor will provide custom software development services.\n- Deliverables include the final software product, documentation, and training sessions for end-users.\n- The vendor is required to deliver a working prototype within the first 30 days of the contract.\n\n### Notice Periods\n- The contract outlines a 30-day notice period for either party to terminate the agreement.\n- Any changes to the scope of work require a 15-day written notice to the other party.\n\n### Renewal Terms\n- The contract is initially for a period of 12 months.\n- Automatic renewal will occur for an additional 12 months unless either party provides a 90-day written notice of non-renewal.\n- Renewal terms include the same terms and conditions as the original contract.\n\n### Milestones\n- Milestone 1: Completion of the software design and architecture within 45 days.\n- Milestone 2: Delivery of the first working prototype within 90 days.\n- Milestone 3: Final s

### Operation AI AGENT

In [62]:
Prompt = """
You are an operations lead. Analyse the text and provide your findings using clear ,well-formatted bullet points or headings for the folowing categories:
operational_dependencies, onboarding_requirements, security_concerns, SLA_items, action_items.Do not use json
"""
def operation(text: str): 
    prompt = Prompt.format(input_text = text)
    response = client.chat_completion(messages=[{"role":"user","content":prompt}],max_tokens= 512,temperature = 0.1).choices[0].message["content"]
    return response