In [1]:
# Add project root to sys.path so 'src' is importable
import sys
import os
project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
if project_root not in sys.path:
    sys.path.insert(0, project_root)
print("Project root in sys.path:", project_root)
print("Current working directory:", os.getcwd())

Project root in sys.path: c:\Users\wbrya\OneDrive\Documents\GitHub\AI-CFO-FYP
Current working directory: c:\Users\wbrya\OneDrive\Documents\GitHub\AI-CFO-FYP\notebooks


In [2]:
# Cell 2: Load environment variables

from dotenv import load_dotenv

# Load variables from the .env file in the project root
load_dotenv()

# Verify the Gemini API key is loaded (required for MetadataExtractor)
gemini_key = os.environ.get("GEMINI_API_KEY")
if gemini_key:
    print(f"Gemini API Key loaded: {gemini_key[:3]}...{gemini_key[-4:]}")
else:
    print("Gemini API Key not loaded. MetadataExtractor will not work.")
    # In a real script, you'd handle this error properly.
    # For this test, just acknowledge it.

Gemini API Key loaded: AIz...KbyQ


In [3]:
# Cell 3: Import and Instantiate MetadataExtractor

# Import the necessary classes from your src directory
# Assuming src is in sys.path correctly now
from src.llm.GeminiClient import GeminiClient
from src.services.MetadataExtractor import MetadataExtractor

print("\nAttempting to instantiate clients...")

try:
    # Instantiate GeminiClient first, as MetadataExtractor depends on it
    gemini_client = GeminiClient()

    # Instantiate MetadataExtractor
    metadata_extractor = MetadataExtractor(gemini_client=gemini_client)

    print("MetadataExtractor instantiated successfully.")

except ValueError as e:
    print(f"Configuration Error during instantiation: {e}")
except Exception as e:
    print(f"An unexpected error occurred during instantiation: {e}")


Attempting to instantiate clients...
Initializing Gemini client with API key: AIz...yQ
MetadataExtractor instantiated successfully.


In [4]:
# Cell 4: Prepare Sample Markdown Snippet and Test Extraction

# Define a sample markdown snippet mimicking the start of a financial report
# This snippet should contain information like report type, company name, date, year, quarter, summary
from datetime import date


sample_markdown_snippet = """
--- Page 1 Start ---

# Annual Report 2023 - Acme Corporation

## Executive Summary

This Annual Report for the fiscal year ended December 31, 2023, provides a comprehensive overview of Acme Corporation's financial performance, strategic initiatives, and future outlook. Despite challenging market conditions, Acme achieved significant milestones, including a 15% increase in total revenue and a 20% improvement in net income.

--- Page 1 End ---

--- Page 2 Start ---

## Table of Contents

1. Executive Summary
2. Financial Highlights
3. Income Statement
4. Balance Sheet
... (rest of document markdown)
"""

print("\nAttempting to extract metadata from sample snippet...")


Attempting to extract metadata from sample snippet...


In [7]:
import json
from src.services.MetadataExtractor import FinancialDocumentMetadata


extracted_data = metadata_extractor.extract_metadata(sample_markdown_snippet)

print("\n--- Extracted Metadata ---")

if isinstance(extracted_data, FinancialDocumentMetadata):
    print("Extraction successful. Result is a FinancialDocumentMetadata object.")
    print(extracted_data.model_dump_json(indent=4))
    print(extracted_data.company_name)

else:
    # This block should ideally not be hit if the AI and Pydantic work as expected
    # given the constraints (no error handling in the service means no error dict returned)
    print("Extraction did not return a FinancialDocumentMetadata object.")
    print(f"Received type: {type(extracted_data)}")
    print(f"Received data: {extracted_data}")



Sending text snippet to LLM for structured metadata extraction...
Structured metadata extraction attempted.

--- Extracted Metadata ---
Extraction successful. Result is a FinancialDocumentMetadata object.
{
    "doc_specific_type": "Annual Report",
    "company_name": "Acme Corporation",
    "report_date": "2023-12-31",
    "doc_year": 2023,
    "doc_quarter": 4,
    "doc_summary": "This Annual Report for the fiscal year ended December 31, 2023, provides a comprehensive overview of Acme Corporation's financial performance, strategic initiatives, and future outlook. Despite challenging market conditions, Acme achieved significant milestones, including a 15% increase in total revenue and a 20% improvement in net income."
}
Acme Corporation
