In [1]:
from currensee.agents.complete_graph import compiled_graph
from currensee.agents.tools.finance_tools import generate_macro_table
import warnings

warnings.filterwarnings('ignore')

2025-05-18 19:09:14,143 - currensee.core.secrets - INFO - PROJECT_ID not found, attempting to load from .env file
2025-05-18 19:09:14,147 - currensee.core.secrets - INFO - Loaded environment from /home/jupyter/so_currensee/tools_test/currensee/.env
2025-05-18 19:09:14,147 - currensee.core.secrets - INFO - SecretManager initialized with project_id: adsp-34002-on02-sopho-scribe


## Define Initial State

This is data that we should be retrieving from each meeting invite.

**DO NOT** change this data until the CRM DB has been updated

In [2]:
init_state = {
        'client_name': 'Adam Clay',
        'client_email': 'adam.clay@compass.com',
        'meeting_timestamp': '2024-03-26 11:00:00',
        'meeting_description': 'Compass - Annual Credit Facility Review Meeting',
        'report_length': 'long'
    }

In [3]:
# Email prompt summary should be updated in the file 
# currensee/agents/tools/outlook_tools.py in the function produce_client_email_summary

In [4]:
# Finance news prompt summary should be updated in the file 
# currensee/agents/tools/finance_tools.py in the summarize_finance_outputs function

In [5]:
result = compiled_graph.invoke(init_state)


Generating report with length: long



In [6]:
summary = result['final_summary']

# === Add the macro snapshot
macro_table = generate_macro_table()

In [7]:
full_report = summary #+ "\n\n### Macro Financial Snapshot\n\n" + macro_table
print(full_report)

**Compass - Annual Credit Facility Review Meeting: Briefing Document**

**1. Past Email Summary:**

The $25 million revolving credit facility for Compass was established following initial contact at the Real Estate Finance conference.  An indicative term sheet was provided, followed by Compass's internal approval and the finalization of the loan agreement.  The facility has been successfully funded, with Compass reporting straightforward drawdowns and satisfactory reporting processes.

**2. Recent Email Topics:**

* Compass confirmed the credit facility is functioning smoothly, with easy drawdowns and clear reporting procedures.
* Bankwell Financial re-affirmed the official closing and funding of the $25 million facility.
* Bankwell Financial offered continued support and assistance to Compass regarding facility utilization.
* Both Bankwell Financial and Compass expressed mutual satisfaction with the partnership and the efficient closing process.

**3. Recent Client Questions:**

1.  C

In [8]:
#Traceback

client_industry_summary = result.get('client_industry_sources', [])
macro_news_summary = result.get('macro_news_sources', [])

# Function to format and print client_industry and macro_news summaries
def print_summary(summary, summary_name):
    print(f"\n{summary_name}:\n{'='*len(summary_name)}")
    for article in summary:
        title = article.get('title', 'No Title')
        snippet = article.get('snippet', 'No Snippet')
        date = article.get('date', 'No Date')
        link = article.get('link', '')
        
        # Extract the source from the domain of the link
        source = link.split("/")[2] if link else 'No Source'
        
        # Format and print the information
        print(f"Title: {title}")
        print(f"Snippet: {snippet}")
        print(f"Date: {date}")
        print(f"Source: {source}")
        print(f"Link: {link}")  # Displaying the full link
        print("-" * 40)  # Just a separator for readability

# Function to format and print the holdings summary
def print_holdings_summary(holdings_sources):
    print("\nClient Holdings Summary:")
    print("========================")

    for holding, articles in holdings_sources.items():
        print(f"\n--- {holding} ---")
        if not articles:
            print("No news found.")
            continue

        for article in articles:
            title = article.get('title', 'No Title')
            snippet = article.get('snippet', 'No Snippet')
            date = article.get('date', 'No Date')
            link = article.get('link', '')
            source = link.split("/")[2] if link else 'No Source'

            print(f"Title: {title}")
            print(f"Snippet: {snippet}")
            print(f"Date: {date}")
            print(f"Source: {source}")
            print(f"Link: {link}")
            print("-" * 40)
            
# Print the formatted summaries
#print_summary(client_industry_summary, 'Client Industry Summary')
#print_summary(macro_news_summary, 'Macro News Summary')
#print_holdings_summary(result.get("client_holdings_sources", {}))

In [9]:
from textwrap import wrap
from currensee.core import get_model, settings

def chunk_sources_with_metadata(sources: dict[str, list[dict]], max_length: int = 1000) -> dict[str, tuple[str, str]]:
    """
    Chunk each source's snippet and retain the original link with each chunk.
    Returns a dict like { 'Client Industry Summary [1.1]': (chunk_text, source_url) }
    """
    chunked = {}
    for category, entries in sources.items():
        for i, entry in enumerate(entries):
            snippet = entry.get("snippet", "")
            link = entry.get("link", "")
            title = entry.get("title", "")

            full_text = f"{title}\n{snippet}".strip()
            chunks = wrap(full_text, max_length, break_long_words=False, replace_whitespace=False)

            for j, chunk in enumerate(chunks):
                key = f"{category} [{i+1}.{j+1}]"
                chunked[key] = (chunk.strip(), link)
    return chunked

In [11]:
def build_prompt_with_urls(summary: str, chunked_sources: dict[str, tuple[str, str]]) -> str:
    formatted_sources = "\n\n".join(
        f"{key} (Source: {url}):\n{chunk}" for key, (chunk, url) in chunked_sources.items()
    )

    return f"""
You are a financial analyst assistant. You generated the following summary:

--- Summary ---
{summary}

You used these source snippets (each with its original URL):

--- Sources ---
{formatted_sources}

Please map each claim from the summary to the URLs that support it. Format:

- Summary claim: "..."
  → Source URL(s): ["https://..."]

Use only the URLs in the provided sources. Don't invent URLs.
"""

def format_holdings_sources(raw_sources):
    if not raw_sources:
        return []

    formatted = []
    for ticker, articles in raw_sources.items():
        for article in articles:
            formatted.append({
                "title": article.get("title", ticker),
                "snippet": article.get("snippet", ""),
                "link": article.get("link", "")
            })
    return formatted


# Step 1: Get and chunk sources properly
sources = {
    "Client Industry Summary": result.get("client_industry_sources", []),
    "Holdings Summary": format_holdings_sources(result.get("client_holdings_sources", {})),
    "Macro Summary": result.get("macro_news_sources", [])
}

chunked_sources = chunk_sources_with_metadata(sources)

# Step 2: Compose prompt and ask LLM
prompt = build_prompt_with_urls(summary, chunked_sources)

# Step 3: Invoke LLM
from langchain_core.messages import HumanMessage

model = get_model(settings.DEFAULT_MODEL)
response = model.invoke([HumanMessage(content=prompt)])


In [12]:
import re

# Step 3.5: Filter the output to remove claims with no supporting URLs
def filter_empty_sources(response_text: str) -> str:
    # Split the output into individual claim blocks
    claim_blocks = re.split(r"\n(?=- Summary claim:)", response_text.strip())

    # Keep only those blocks that contain at least one URL
    filtered_blocks = [
        block for block in claim_blocks
        if not re.search(r'→ Source URL\(s\):\s*\[\s*\]\s*(\*.*\*)?', block)
    ]

    return "\n\n".join(filtered_blocks)


In [13]:
def extract_claim_url_pairs(response_text: str) -> list[tuple[str, list[str]]]:
    """
    Extracts a list of (claim, urls) from the LLM's response.
    """
    claim_url_pairs = []
    blocks = re.findall(r'- Summary claim:\s*"(.*?)"\s*→ Source URL\(s\):\s*(\[.*?\])', response_text, re.DOTALL)
    for claim, urls_str in blocks:
        try:
            urls = eval(urls_str, {"__builtins__": None}, {})
            if isinstance(urls, list) and all(isinstance(u, str) for u in urls):
                claim_url_pairs.append((claim.strip(), urls))
        except Exception:
            continue
    return claim_url_pairs

In [14]:
def insert_links_into_summary(summary: str, claim_url_pairs: list[tuple[str, list[str]]]) -> str:
    """
    Inserts Markdown-style [Source] links after corresponding claims in the summary.
    """
    updated_summary = summary

    for claim, urls in claim_url_pairs:
        if len(urls) == 1:
            link_text = f" ([Source]({urls[0]}))"
        else:
            link_text = " (" + ", ".join(
                f"[Source {i+1}]({url})" for i, url in enumerate(urls)
            ) + ")"
        
        # Escape regex special characters in the claim text
        pattern = re.escape(claim)
        replacement = f'{claim}{link_text}'
        
        updated_summary, count = re.subn(pattern, replacement, updated_summary, count=1)
        if count == 0:
            print(f"⚠️ Could not find claim in summary: '{claim}'")

    return updated_summary

In [15]:
response = model.invoke([HumanMessage(content=prompt)])
filtered_output = filter_empty_sources(response.content)
claim_url_pairs = extract_claim_url_pairs(filtered_output)
linked_summary = insert_links_into_summary(summary, claim_url_pairs)

In [16]:
def insert_links_into_summary_html(summary: str, claim_url_pairs: list[tuple[str, list[str]]]) -> str:
    """
    Inserts HTML-style <a> links after claims and returns a fully HTML-formatted summary.
    """
    updated_summary = summary

    for claim, urls in claim_url_pairs:
        if len(urls) == 1:
            link_text = f' (<a href="{urls[0]}" target="_blank" rel="noopener noreferrer" title="">Source</a>)'
        else:
            link_text = " (" + ", ".join(
                f'<a href="{url}" target="_blank" rel="noopener noreferrer" title="">Source {i+1}</a>'
                for i, url in enumerate(urls)
            ) + ")"

        pattern = re.escape(claim)
        replacement = f'{claim}{link_text}'

        updated_summary, count = re.subn(pattern, replacement, updated_summary, count=1)
        if count == 0:
            print(f"⚠️ Could not find claim in summary: '{claim}'")

    return updated_summary

In [17]:

def format_html_paragraphs(text: str) -> str:
    lines = text.strip().split("\n")
    html_lines = []
    for line in lines:
        if line.startswith("* "):
            html_lines.append(f"<li>{line[2:]}</li>")
        elif line.strip().startswith("**") and line.strip().endswith("**"):
            html_lines.append(f"<h3>{line.strip('* ')}</h3>")
        elif line.strip() == "":
            html_lines.append("<br>")
        else:
            html_lines.append(f"<p>{line.strip()}</p>")
    return "\n".join(html_lines)
linked_summary = insert_links_into_summary_html(summary, claim_url_pairs)
final_html = format_html_paragraphs(linked_summary)

with open("annotated_summary.html", "w", encoding="utf-8") as f:
    f.write(f"<html><body>{final_html}</body></html>")