In [2]:
# Add project root to sys.path so 'src' is importable
import sys
import os
project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
if project_root not in sys.path:
    sys.path.insert(0, project_root)
print("Project root in sys.path:", project_root)
print("Current working directory:", os.getcwd())

Project root in sys.path: c:\Users\wbrya\OneDrive\Documents\GitHub\AI-CFO-FYP
Current working directory: c:\Users\wbrya\OneDrive\Documents\GitHub\AI-CFO-FYP\notebooks


In [3]:
# Cell 7: Import necessary classes and modules for Sectioner test
import uuid # To generate dummy UUIDs

# Assuming your src directory is installable
from src.services.Sectioner import Sectioner

# --- Define sample Markdown text ---
# This simulates output from FinancialDocParser with page markers and headings
sample_markdown = """
--- Page 1 Start ---

# Annual Report 2023

This is the introductory text for the annual report. It covers the period from
January 1, 2023 to December 31, 2023.

--- Page 1 End ---

--- Page 2 Start ---

## Section 1: Financial Highlights

Key figures for the year:
Revenue: $1.2B
Net Income: $200M

--- Page 2 End ---

--- Page 3 Start ---

## Section 2: Income Statement

Detailed revenue analysis:

| Item          | 2023 ($M) | 2022 ($M) |
|---------------|-----------|-----------|
| Product Sales | 800       | 700       |
| Service Fees  | 400       | 340       |
| **Total**     | **1200**  | **1040**  |

### Operating Expenses

Breakdown of costs.

--- Page 3 End ---

--- Page 4 Start ---

Detailed expense breakdown:
- Research & Development: $150M
- Marketing: $200M
- General & Administrative: $100M

--- Page 4 End ---

--- Page 5 Start ---

## Section 3: Balance Sheet

Assets and Liabilities.

### Assets

Current Assets:
- Cash: $50M
- Accounts Receivable: $100M

--- Page 5 End ---
"""

# --- Create dummy IDs for testing ---
# In the real pipeline, these would come from earlier steps
test_document_id = uuid.uuid4()
test_user_id = uuid.uuid4()

print("Starting Sectioner test...")
print(f"Using dummy Document ID: {test_document_id}")
print(f"Using dummy User ID: {test_user_id}")

try:
    # Instantiate the Sectioner
    sectioner = Sectioner()

    # Run the sectioner on the sample markdown
    sections_data = sectioner.section_markdown(
        markdown_content=sample_markdown,
        document_id=test_document_id,
        user_id=test_user_id
    )

    # Print the results
    print(f"\n--- Sectioner Output ({len(sections_data)} sections) ---")
    if sections_data:
        for i, section in enumerate(sections_data):
            print(f"\nSection {i+1} (Index: {section['section_index']}):")
            print(f"  Heading: '{section.get('section_heading')}'")
            print(f"  Document ID: {section.get('document_id')}")
            print(f"  User ID: {section.get('user_id')}")
            print(f"  Page Numbers: {section.get('page_numbers')}")
            # Print only the beginning of the content
            content_preview = section.get('content_markdown', '')[:200] + '...'
            print(f"  Content Preview:\n---\n{content_preview}\n---")
            print(f"  Full Content Length: {len(section.get('content_markdown', ''))}")
    else:
        print("No sections were generated.")

except Exception as e:
    print(f"An error occurred during Sectioner test: {e}")

Starting Sectioner test...
Using dummy Document ID: 46efd28c-c5f3-45e2-8326-44d7c3eab33a
Using dummy User ID: 7b883a05-d2fd-4de8-98fd-24e69a498fad
Initialized Sectioner.
Sectioning markdown content (60 lines)...
Finalized section 0: 'Document Start'
Finalized section 1: 'Annual Report 2023'
Finalized section 2: 'Section 1: Financial Highlights'
Finalized section 3: 'Section 2: Income Statement'
Finalized section 4: 'Operating Expenses'
Finalized section 5: 'Section 3: Balance Sheet'
Finalized last section 6: 'Assets'
Sectioning complete. Created 7 sections.

--- Sectioner Output (7 sections) ---

Section 1 (Index: 0):
  Heading: 'Document Start'
  Document ID: 46efd28c-c5f3-45e2-8326-44d7c3eab33a
  User ID: 7b883a05-d2fd-4de8-98fd-24e69a498fad
  Page Numbers: [1]
  Content Preview:
---
--- Page 1 Start ---...
---
  Full Content Length: 20

Section 2 (Index: 1):
  Heading: 'Annual Report 2023'
  Document ID: 46efd28c-c5f3-45e2-8326-44d7c3eab33a
  User ID: 7b883a05-d2fd-4de8-98fd-24e69a4