In [1]:
from pydantic import BaseModel, Field
from typing import List, Optional
from datetime import date
import ollama
import json
from pathlib import Path

In [2]:

class ContractMetadata(BaseModel):
    contract_title: str = Field(..., description="Title or short name of the contract")
    contract_type: str = Field(..., description="Type of contract, e.g., Purchase Contract, MSA, SOW")
    supplier_name: str = Field(..., description="Name of the supplier/vendor")
    buyer_name: str = Field(..., description="Name of the buyer organization")
    purchase_order: Optional[str] = Field(None, description="Linked purchase order ID, if any")
    purchase_date: Optional[date] = Field(None, description="Date the contract was signed or became effective in DD-MM-YYYY format")
    expiry_date: Optional[date] = Field(None, description="Date the contract expires or ends in DD-MM-YYYY format")
    contract_description: Optional[str] = Field(None, description="Short description or preamble of the contract")
    objective: Optional[str] = Field(None, description="Stated business objective of the contract")
    scope: Optional[List[str]] = Field(default_factory=list, description="List describing the scope of the agreement")
    pricing_and_payment_terms: Optional[str] = Field(None, description="Terms related to pricing and payment schedule")
    delivery_terms: Optional[str] = Field(None, description="Conditions related to delivery, location, timelines")
    quality_assurance: Optional[str] = Field(None, description="Obligations regarding quality and handling of defects")
    confidentiality_clause: Optional[bool] = Field(False, description="Whether a confidentiality clause is present")
    termination_clause: Optional[str] = Field(None, description="Conditions under which the contract may be terminated")
    # signatories: List[Signatory] = Field(default_factory=list, description="List of signatories with roles")
    # document_page_count: Optional[int] = Field(None, description="Total number of pages in the contract")

In [3]:
def load_json_contracts(file_path)-> dict:   
    with open(file_path, 'r') as f:
        return json.load(f)

In [4]:
filepath = Path("../data/silver/contracts/CW0327.pdf.json")

'CW0327'

In [4]:
test_contract = load_json_contracts("../data/silver/contracts/CW0327.pdf.json")

In [5]:
test_contract["text"]

' \n PROCUREMENT CONTRACT  \nThis Procurement Contract (the "Contract") is entered into between B2, hereinafter referred to \nas the "Supplier," and Plasma Corporation, hereinafter referred to as the "Buyer."  \n1. TERM  \n1.1 Effective Date: This Contract shall become effective on February 2023.  \n1.2 Expiry Date: The initial term of this Contract shall be for a period of 3 years from the \nEffective Date unless terminated earlier as per the terms of this Contract.  \n2. SUPPLIER DETAILS  \nSupplier Name: B2 Address: 456 Oak Avenue, Cityville, USA Contact Person: Jane Doe Email: \njane@xyzsuppliers.com Phone: +1-987-654-3210  \n3. PURCHASE DETAILS  \n3.1 Product/Service Description: The Supplier shall provide the following products/services to \nthe Buyer:  \nProduct X2 \nProduct Y2 \nProduct Z2 \n \n3.2 Volume Discounts: The Buyer and the Supplier agree to the following volume discounts \nbased on the cumulative purchase volume during the Contract term:  \n• \n• 100 units - 500 unit

In [6]:
SYSTEM_PROMPT = """
You are a Contract Metadata Extractor.

Your job is to analyze raw contract documents and extract structured metadata in JSON format.

Use the following Pydantic model as a reference structure:

```python
class ContractMetadata(BaseModel):
    contract_title: str
    contract_type: str
    supplier_name: str
    buyer_name: str
    purchase_order: Optional[str]
    purchase_date: Optional[date]
    expiry_date: Optional[date]
    contract_description: Optional[str]
    objective: Optional[str]
    scope: Optional[List[str]]
    pricing_and_payment_terms: Optional[str]
    delivery_terms: Optional[str]
    quality_assurance: Optional[str]
    confidentiality_clause: Optional[bool]
    termination_clause: Optional[str]

Guidelines:

- Extract all fields if present. If a field is not found, omit it or set it to null.
- Dates must be in ISO format: YYYY-MM-DD.
- Date might be provided in the text. like Effective date: December 2020, which you should consider as Effective Date: 01-12-2020
- Expiry date might be provided in the text. like Expiry Date: 5 years from the Effective Date, which you should consider as Expiry Date: 01-12-2025
- Fields like scope must be a list of bullet points if multiple scopes are defined.
- For contract_type, infer the type such as "Purchase Contract", "MSA", "SOW", Framework Agreement / PO Contract etc.
- If no contract_type found in the text, set it to "Contract".
- For confidentiality_clause, return true if any confidentiality-related language exists; else false.
- For signatories, include organization names and their roles (e.g., "Buyer", "Supplier").
- Output the result as valid JSON matching the model above, with no commentary or explanation.

You will now be given the full contract document as input.
"""

In [7]:
response = ollama.chat(
  messages=[
    {
        'role': 'system',
        'content': SYSTEM_PROMPT,
    },

    {
      'role': 'user',
      'content': test_contract["text"],
    }
  ],
  model='gemma3:4b',
  format=ContractMetadata.model_json_schema(),
)


In [8]:
response.message.content

'{\n    "contract_title": "Procurement Contract",\n    "contract_type": "Purchase Contract",\n    "supplier_name": "B2",\n    "buyer_name": "Plasma Corporation",\n    "purchase_order": null,\n    "purchase_date": "2023-02-20",\n    "expiry_date": "2026-02-20",\n    "contract_description": null,\n    "objective": null,\n    "scope": [\n        "Product X2",\n        "Product Y2",\n        "Product Z2"\n    ],\n    "pricing_and_payment_terms": "Product X2: $15 per unit; Product Y2: $25 per unit; Product Z2: $35 per unit; The Buyer shall make payments to the Supplier within 45 days from the receipt of a valid invoice. Payments shall be made via PayPal.",\n    "delivery_terms": null,\n    "quality_assurance": null,\n    "confidentiality_clause": true,\n    "termination_clause": "Either party may terminate this Contract with 60 days written notice to the other party. Either party may terminate this Contract immediately in the event of a material breach by the other party."\n}\n'

In [9]:
ContractMetadata.model_validate_json(response.message.content)

ContractMetadata(contract_title='Procurement Contract', contract_type='Purchase Contract', supplier_name='B2', buyer_name='Plasma Corporation', purchase_order=None, purchase_date=datetime.date(2023, 2, 20), expiry_date=datetime.date(2026, 2, 20), contract_description=None, objective=None, scope=['Product X2', 'Product Y2', 'Product Z2'], pricing_and_payment_terms='Product X2: $15 per unit; Product Y2: $25 per unit; Product Z2: $35 per unit; The Buyer shall make payments to the Supplier within 45 days from the receipt of a valid invoice. Payments shall be made via PayPal.', delivery_terms=None, quality_assurance=None, confidentiality_clause=True, termination_clause='Either party may terminate this Contract with 60 days written notice to the other party. Either party may terminate this Contract immediately in the event of a material breach by the other party.')