In [1]:
from contramate.services import DocumentInfo, MarkdownChunkingService
from contramate.utils import read_markdown_safe
from contramate.dbs.models import ContractAsmd
from contramate.utils.settings.core import PostgresSettings
from sqlmodel import Session, create_engine, select
from pathlib import Path

[32m2025-10-12 14:07:10.247[0m | [1mINFO    [0m | [36mcontramate.utils.settings.base[0m:[36mfind_env_file_if_exists[0m:[36m22[0m - [1mLoading settings from System Environment[0m


In [2]:
ENVFILE_PAHT = Path().absolute().parent.joinpath(".envs", "local.env")

In [3]:
PG_SETTINGS = PostgresSettings.from_env_file(ENVFILE_PAHT)

In [4]:

PROJECT_ID = "0a70f56d-a6d8-4ac0-810c-db798644bb50"
REFERENCE_DOC_ID = "d7fb9ba6-22e1-5d9c-a18e-2fc709c9a562"
FILENAME = "TRUENORTHENERGYCORP_02_08_2007-EX-10.1-DEVELOPMENT AGREEMENT.PDF.md"

FILE_PATH = Path().absolute().parent.joinpath("data", "silver", PROJECT_ID, REFERENCE_DOC_ID, FILENAME)
print(FILE_PATH)

/Users/datapsycho/PythonProjects/AgentEngBootCamp/contramate/data/silver/0a70f56d-a6d8-4ac0-810c-db798644bb50/d7fb9ba6-22e1-5d9c-a18e-2fc709c9a562/TRUENORTHENERGYCORP_02_08_2007-EX-10.1-DEVELOPMENT AGREEMENT.PDF.md


In [5]:
# Query contract_asmd table to get document information

def get_doc_info(project_id:str, reference_doc_id: str):
    connection_string = PG_SETTINGS.connection_string
    engine = create_engine(connection_string, echo=False)

    with Session(engine) as session:
        # Query by project_id and reference_doc_id
        statement = select(ContractAsmd).where(
            ContractAsmd.project_id == project_id,
            ContractAsmd.reference_doc_id == reference_doc_id
        )
        contract = session.exec(statement).first()
        
        if contract:
            print(f"Found contract: {contract.document_title}")
            print(f"Contract type: {contract.contract_type}")
            
            # Create DocumentInfo from database record
            doc_info = DocumentInfo(
                project_id=contract.project_id,
                reference_doc_id=contract.reference_doc_id,
                contract_type=contract.contract_type or "Unknown"
            )
            
            print(f"\nDocumentInfo created:")
            print(f"  Project ID: {doc_info.project_id}")
            print(f"  Reference Doc ID: {doc_info.reference_doc_id}")
            print(f"  Contract Type: {doc_info.contract_type}")
            return doc_info
        else:
            print("Contract not found in database!")
            doc_info = None

In [6]:
doc_info = get_doc_info(PROJECT_ID, REFERENCE_DOC_ID)
markdown_content = read_markdown_safe(FILE_PATH)

[32m2025-10-12 14:07:10.418[0m | [1mINFO    [0m | [36mcontramate.utils.file_utils[0m:[36mread_markdown[0m:[36m36[0m - [1mSuccessfully read markdown file: /Users/datapsycho/PythonProjects/AgentEngBootCamp/contramate/data/silver/0a70f56d-a6d8-4ac0-810c-db798644bb50/d7fb9ba6-22e1-5d9c-a18e-2fc709c9a562/TRUENORTHENERGYCORP_02_08_2007-EX-10.1-DEVELOPMENT AGREEMENT.PDF.md (44078 chars)[0m


Found contract: TRUENORTHENERGYCORP_02_08_2007-EX-10.1-DEVELOPMENT AGREEMENT.PDF
Contract type: Development

DocumentInfo created:
  Project ID: 0a70f56d-a6d8-4ac0-810c-db798644bb50
  Reference Doc ID: d7fb9ba6-22e1-5d9c-a18e-2fc709c9a562
  Contract Type: Development


In [7]:
# Create MarkdownChunkingService and process chunks
if doc_info and markdown_content:
    service = MarkdownChunkingService(
        markdown_content=markdown_content,
        doc_info=doc_info,
        token_limit=5000,
        min_chunk_size=100
    )
    
    # Process markdown into chunks
    result = service()

[32m2025-10-12 14:07:17.410[0m | [1mINFO    [0m | [36mcontramate.services.markdown_chunking_service[0m:[36mprocess_markdown_to_chunks[0m:[36m379[0m - [1mToken limit per chunk: 5000[0m
[32m2025-10-12 14:07:17.410[0m | [1mINFO    [0m | [36mcontramate.services.markdown_chunking_service[0m:[36msplit_into_sections[0m:[36m241[0m - [1mFound preamble content before first header[0m
[32m2025-10-12 14:07:17.412[0m | [1mINFO    [0m | [36mcontramate.services.markdown_chunking_service[0m:[36mprocess_markdown_to_chunks[0m:[36m382[0m - [1mSections count in document: 160[0m
[32m2025-10-12 14:07:17.431[0m | [1mINFO    [0m | [36mcontramate.services.markdown_chunking_service[0m:[36mprocess_markdown_to_chunks[0m:[36m599[0m - [1mNumber of chunks created: 3[0m


In [8]:
response = result.unwrap()

In [11]:
print(response.chunks[1])

content='Context: Document Preamble\n\n3.4 If Company elects not to participate in a Substitute Well, or is deemed not to participate in a Substitute Well, this Agreement\nshall terminate except as provided in Sections 6.2, 6.3, 6.4, and 6.5.\n\nContext: Document Preamble > 3. 4 If Company elects not to participate in a Substitute Well, or is deemed not to participate in a Substitute Well, this Agreement\n\n**ARTICLE IV**\n\nContext: Document Preamble > 3. 4 If Company elects not to participate in a Substitute Well, or is deemed not to participate in a Substitute Well, this Agreement\n\n**EARNING RIGHTS**\n\nContext: Document Preamble\n\n4.1 When and if the Initial Well is drilled to the Objective Zone and successfully Completed as a well capable of producing oil\nand/or gas in paying quantities, BP shall assign to Company, by partial assignment in the form attached hereto as Exhibit “E”(the “Partial\nAssignment”), an eight and three quarters percent (8.75%) working interest in the Ini