In [None]:
from currensee.agents.complete_graph import compiled_graph
from currensee.agents.tools.finance_tools import generate_macro_table
from currensee.utils.output_utils import (
    generate_long_report,
    format_news_summary_to_html,
    format_paragraph_summary_to_html,
    save_html_to_file,
    generate_short_report,
    convert_html_to_pdf,
    generate_med_report,
)
import warnings
import joblib

warnings.filterwarnings("ignore")

In [None]:
from datetime import date

today = date.today()
today_date = today.strftime("%Y%m%d")
print(today_date)

## Define Initial State

This is data that we should be retrieving from each meeting invite.

**DO NOT** change this data until the CRM DB has been updated

In [None]:
init_state = {
    "client_name": "Adam Clay",
    "client_email": "adam.clay@compass.com",
    "meeting_timestamp": "2024-03-26 11:00:00",
    "meeting_description": "Compass - Annual Credit Facility Review Meeting",
}

result = compiled_graph.invoke(init_state)
summary = result["final_summary"]
full_report = summary
# full_report = summary + "\n\n### Macro Financial Snapshot\n\n" + macro_table
print(full_report)

In [None]:
init_state = {
    "client_name": "Adam Clay",
    "client_email": "adam.clay@compass.com",
    "meeting_timestamp": "2024-03-26 11:00:00",
    "meeting_description": "Compass - Annual Credit Facility Review Meeting",
    "report_length": "short",
}

result = compiled_graph.invoke(init_state)
summary = result["final_summary"]
full_report = summary
# full_report = summary + "\n\n### Macro Financial Snapshot\n\n" + macro_table
print(full_report)

# Test case 2

In [None]:
init_state = {
    "client_name": "Jennifer Phelps",
    "client_email": "jennifer.phelps@aerovironment.com",
    "meeting_timestamp": "2018-07-24 14:00:00",
    "meeting_description": "AeroVironment - Initial Discussion on Financial Needs",
}
result = compiled_graph.invoke(init_state)
summary = result["final_summary"]
full_report = summary
# full_report = summary + "\n\n### Macro Financial Snapshot\n\n" + macro_table
print(full_report)

# Test Case 3

In [None]:
init_state = {
    "client_name": "Jessica Palmer",
    "client_email": "jessica.palmer@hasbro.com",
    "meeting_timestamp": "2024-02-04 14:30:00",
    "meeting_description": "Hasbro - Discuss Portfolio Diversification and Bond Strategy",
    "report_length": "short",  # 'short' , 'medium' , 'long' (default)
}
result = compiled_graph.invoke(init_state)

In [None]:
summary = result["final_summary_sourced"]
full_report = summary
print(full_report)

In [None]:
result = compiled_graph.invoke(init_state)
client_report = generate_short_report(result)
save_html_to_file(
    client_report,
    f"../../data/generated_reports/client_report_sample_hasbro_short_{today_date}.html",
)

# Test Case 4

In [None]:
init_state = {
    "client_name": "Jessica Palmer",
    "client_email": "jessica.palmer@hasbro.com",
    "meeting_timestamp": "2024-02-04 14:30:00",
    "meeting_description": "Hasbro - Discuss Portfolio Diversification and Bond Strategy",
}
result = compiled_graph.invoke(init_state)

In [None]:
summary = result["final_summary_sourced"]
print(summary)

In [None]:
# report style
client_report = generate_long_report(result)
save_html_to_file(
    client_report,
    f"../../generated_reports/client_report_sample_hasbro_{today_date}.html",
)

# Test 5

In [None]:
init_state = {
    "client_name": "Jessica Palmer",
    "client_email": "jessica.palmer@hasbro.com",
    "meeting_timestamp": "2024-02-04 14:30:00",
    "meeting_description": "Hasbro - Discuss Portfolio Diversification and Bond Strategy",
    "report_length": "medium",
}
result = compiled_graph.invoke(init_state)

In [None]:
summary = result["final_summary_sourced"]
print(summary)

In [None]:
# report style
client_report = generate_med_report(result)
save_html_to_file(
    client_report, f"../../generated_reports/client_report_hasbro_med_{today_date}.html"
)

# Testing for adding in sourcing

In [None]:
client_industry_summary = result.get("client_industry_sources", [])
macro_news_summary = result.get("macro_news_sources", [])


# Function to format and print client_industry and macro_news summaries
def print_summary(summary, summary_name):
    print(f"\n{summary_name}:\n{'='*len(summary_name)}")
    for article in summary:
        title = article.get("title", "No Title")
        snippet = article.get("snippet", "No Snippet")
        date = article.get("date", "No Date")
        link = article.get("link", "")

        # Extract the source from the domain of the link
        source = link.split("/")[2] if link else "No Source"

        # Format and print the information
        print(f"Title: {title}")
        print(f"Snippet: {snippet}")
        print(f"Date: {date}")
        print(f"Source: {source}")
        print(f"Link: {link}")  # Displaying the full link
        print("-" * 40)  # Just a separator for readability


# Function to format and print the holdings summary
def print_holdings_summary(holdings_sources):
    print("\nClient Holdings Summary:")
    print("========================")

    for holding, articles in holdings_sources.items():
        print(f"\n--- {holding} ---")
        if not articles:
            print("No news found.")
            continue

        for article in articles:
            title = article.get("title", "No Title")
            snippet = article.get("snippet", "No Snippet")
            date = article.get("date", "No Date")
            link = article.get("link", "")
            source = link.split("/")[2] if link else "No Source"

            print(f"Title: {title}")
            print(f"Snippet: {snippet}")
            print(f"Date: {date}")
            print(f"Source: {source}")
            print(f"Link: {link}")
            print("-" * 40)

In [None]:
from textwrap import wrap
from currensee.core import get_model, settings


def chunk_sources_with_metadata(
    sources: dict[str, list[dict]], max_length: int = 1000
) -> dict[str, tuple[str, str]]:
    """
    Chunk each source's snippet and retain the original link with each chunk.
    Returns a dict like { 'Client Industry Summary [1.1]': (chunk_text, source_url) }
    """
    chunked = {}
    for category, entries in sources.items():
        for i, entry in enumerate(entries):
            snippet = entry.get("snippet", "")
            link = entry.get("link", "")
            title = entry.get("title", "")

            full_text = f"{title}\n{snippet}".strip()
            chunks = wrap(
                full_text, max_length, break_long_words=False, replace_whitespace=False
            )

            for j, chunk in enumerate(chunks):
                key = f"{category} [{i+1}.{j+1}]"
                chunked[key] = (chunk.strip(), link)
    return chunked

In [None]:
def build_prompt_with_urls(
    summary: str, chunked_sources: dict[str, tuple[str, str]]
) -> str:
    formatted_sources = "\n\n".join(
        f"{key} (Source: {url}):\n{chunk}"
        for key, (chunk, url) in chunked_sources.items()
    )

    return f"""
You are a financial analyst assistant. You generated the following summary:

--- Summary ---
{summary}

You used these source snippets (each with its original URL):

--- Sources ---
{formatted_sources}

Please map each claim from the summary to the URLs that support it. Format:

- Summary claim: "..."
  → Source URL(s): ["https://..."]

Use only the URLs in the provided sources. Don't invent URLs.
"""


def format_holdings_sources(raw_sources):
    if not raw_sources:
        return []

    formatted = []
    for ticker, articles in raw_sources.items():
        for article in articles:
            formatted.append(
                {
                    "title": article.get("title", ticker),
                    "snippet": article.get("snippet", ""),
                    "link": article.get("link", ""),
                }
            )
    return formatted


# Step 1: Get and chunk sources properly
# sources = {
#    "Client Industry Summary": result.get("client_industry_sources", []),
#    "Holdings Summary": format_holdings_sources(result.get("client_holdings_sources", {})),
#    "Macro Summary": result.get("macro_news_sources", [])
# }

# chunked_sources = chunk_sources_with_metadata(sources)

# Step 2: Compose prompt and ask LLM
# prompt = build_prompt_with_urls(summary, chunked_sources)

# Step 3: Invoke LLM
# from langchain_core.messages import HumanMessage

# model = get_model(settings.DEFAULT_MODEL)
# response = model.invoke([HumanMessage(content=prompt)])

In [None]:
def get_soucing_prompt():
    sources = {
        "Client Industry Summary": result.get("client_industry_sources", []),
        "Holdings Summary": format_holdings_sources(
            result.get("client_holdings_sources", {})
        ),
        "Macro Summary": result.get("macro_news_sources", []),
    }
    chunked_sources = chunk_sources_with_metadata(sources)

    # Step 2: Compose prompt and ask LLM
    prompt = build_prompt_with_urls(summary, chunked_sources)
    return prompt

In [None]:
sources = {
    "Client Industry Summary": result.get("client_industry_sources", []),
    "Holdings Summary": format_holdings_sources(
        result.get("client_holdings_sources", {})
    ),
    "Macro Summary": result.get("macro_news_sources", []),
}
chunked_sources = chunk_sources_with_metadata(sources)

# Step 2: Compose prompt and ask LLM
prompt = build_prompt_with_urls(summary, chunked_sources)

In [None]:
import re


# Step 3.5: Filter the output to remove claims with no supporting URLs
def filter_empty_sources(response_text: str) -> str:
    # Split the output into individual claim blocks
    claim_blocks = re.split(r"\n(?=- Summary claim:)", response_text.strip())

    # Keep only those blocks that contain at least one URL
    filtered_blocks = [
        block
        for block in claim_blocks
        if not re.search(r"→ Source URL\(s\):\s*\[\s*\]\s*(\*.*\*)?", block)
    ]

    return "\n\n".join(filtered_blocks)


def extract_claim_url_pairs(response_text: str) -> list[tuple[str, list[str]]]:
    """
    Extracts a list of (claim, urls) from the LLM's response.
    """
    claim_url_pairs = []
    blocks = re.findall(
        r'- Summary claim:\s*"(.*?)"\s*→ Source URL\(s\):\s*(\[.*?\])',
        response_text,
        re.DOTALL,
    )
    for claim, urls_str in blocks:
        try:
            urls = eval(urls_str, {"__builtins__": None}, {})
            if isinstance(urls, list) and all(isinstance(u, str) for u in urls):
                claim_url_pairs.append((claim.strip(), urls))
        except Exception:
            continue
    return claim_url_pairs

In [None]:
def insert_links_into_summary(
    summary: str, claim_url_pairs: list[tuple[str, list[str]]]
) -> str:
    """
    Inserts Markdown-style [Source] links after corresponding claims in the summary.
    Only includes up to 3 sources per claim (truncates any extra).
    """
    updated_summary = summary

    for claim, urls in claim_url_pairs:
        truncated_urls = urls[:3]  # ⛔ Truncate to at most 3 URLs

        if len(truncated_urls) == 1:
            link_text = f" ([Source]({truncated_urls[0]}))"
        else:
            link_text = (
                " ("
                + ", ".join(
                    f"[Source {i+1}]({url})" for i, url in enumerate(truncated_urls)
                )
                + ")"
            )

        pattern = re.escape(claim)
        replacement = f"{claim}{link_text}"

        updated_summary, count = re.subn(pattern, replacement, updated_summary, count=1)
        if count == 0:
            print(f"⚠️ Could not find claim in summary: '{claim}'")

    return updated_summary

In [None]:
# Step 3: Invoke LLM
from langchain_core.messages import HumanMessage

model = get_model(settings.DEFAULT_MODEL)


def get_fin_linked_summary():
    prompt = get_soucing_prompt()
    response = model.invoke([HumanMessage(content=prompt)])
    filtered_output = filter_empty_sources(response.content)
    claim_url_pairs = extract_claim_url_pairs(filtered_output)
    linked_summary = insert_links_into_summary(summary, claim_url_pairs)
    return linked_summary

In [None]:
linked_summary = get_fin_linked_summary()
print(linked_summary)

In [None]:
type(linked_summary)

In [None]:
import re
from weasyprint import HTML


def convert_markdown_links_to_html(text: str) -> str:
    """
    Converts markdown-style links like [Source 1](https://example.com)
    into <a href="https://example.com">Source 1</a>
    """
    return re.sub(
        r"\[([^\]]+)\]\((https?://[^\)]+)\)",
        r'<a href="\2" target="_blank" rel="noopener noreferrer">\1</a>',
        text,
    )


def wrap_html(content: str) -> str:
    """
    Wraps converted content in full HTML with proper styling.
    """
    html_body = content.replace("\n", "<br>")
    return f"""
    <!DOCTYPE html>
    <html>
    <head>
        <meta charset="utf-8">
        <style>
            body {{
                font-family: Arial, sans-serif;
                font-size: 14px;
                line-height: 1.6;
                color: #000;
                padding: 40px;
            }}
            a {{
                color: #0645AD;
                text-decoration: underline;
            }}
        </style>
    </head>
    <body>
        {html_body}
    </body>
    </html>
    """


def generate_pdf_from_summary(
    linked_summary: str, output_file: str = "final_summary.pdf"
):
    """
    Converts markdown-like [Source](url) links to HTML, wraps it, and writes a working PDF.
    """
    html_links = convert_markdown_links_to_html(linked_summary)
    full_html = wrap_html(html_links)
    HTML(string=full_html, base_url=".").write_pdf(output_file)
    print(f"✅ PDF created: {output_file}")


generate_pdf_from_summary(linked_summary, "final_summary.pdf")

# Test Case 6

In [None]:
init_state = {
    "client_name": "Timothy Ochoa",
    "client_email": "timothy.ochoa@hyatthotels.com",
    "meeting_timestamp": "2024-02-27 10:00:00",
    "meeting_description": "Hyatt Hotels - Check Status of 401k Advisory RFI Launch",
}
result = compiled_graph.invoke(init_state)
summary = result["final_summary"]
full_report = summary
print(full_report)

In [None]:
# Generate HTML Report
# result = compiled_graph.invoke(init_state)
# Run this line will save the result
# joblib.dump(result, 'result.joblib')
# result = joblib.load('result.joblib')
client_report = generate_long_report(result)
save_html_to_file(
    client_report,
    f"../../generated_reports/client_report_sample_hasbro_{today_date}.html",
)
# save_html_to_file(client_report, 'client_report_sample.html')

In [None]:
save_html_to_file(
    client_report,
    f"../../generated_reports/client_report_sample_hasbro_{today_date}.html",
)

# Pull of emails from test client 

In [None]:
from google.cloud import secretmanager
import pandas as pd
import numpy as np
from currensee.utils.db_utils import create_pg_engine
from sqlalchemy import text

pd.set_option("display.max_colwidth", None)

In [None]:
# Define DB_NAME
DB_NAME = "crm_outlook"

In [None]:
engine = create_pg_engine(db_name=DB_NAME)

In [None]:
df_result = pd.read_sql(
    "SELECT * FROM meeting_data where invitee_emails = 'timothy.ochoa@hyatthotels.com' order by meeting_timestamp desc ",
    con=engine,
)
df_result

In [None]:
df_result = pd.read_sql(
    "SELECT * FROM meeting_data where invitee_emails = 'jennifer.phelps@aerovironment.com' ",
    con=engine,
)
df_result

In [None]:
df_result = pd.read_sql(
    "SELECT * FROM meeting_data where invitee_emails = 'jessica.palmer@hasbro.com'",
    con=engine,
)
df_result.head(20)

In [None]:
df_result = pd.read_sql(
    "SELECT * FROM email_data where (to_emails = 'jessica.palmer@hasbro.com' or from_email = 'jessica.palmer@hasbro.com' ) order by email_timestamp desc ",
    con=engine,
)
df_result.head(3)

In [None]:
df_result = pd.read_sql(
    "SELECT * FROM email_data where to_emails = 'adam.clay@compass.com' order by email_timestamp desc ",
    con=engine,
)
df_result

In [None]:
df_result = pd.read_sql(
    "SELECT count(*) FROM email_data where to_emails = 'adam.clay@compass.com' ",
    con=engine,
)
print(df_result)

In [None]:
df_result = pd.read_sql(
    "SELECT * FROM meeting_data order by meeting_timestamp desc limit 10 ", con=engine
)
df_result

In [None]:
df_result = pd.read_sql(
    "SELECT * FROM email_data where (to_emails = 'timothy.ochoa@hyatthotels.com' or from_email = 'timothy.ochoa@hyatthotels.com' ) order by email_timestamp desc ",
    con=engine,
)
df_result.head(10)