In [2]:
import sys
sys.path.append(r"d:\VSCode\re-assistant")

In [3]:
from lib.load_data import df
from rapidfuzz import fuzz
from datetime import datetime
import polars as pl
import re

Command-line environment detected. Using local data file.
Loading email metadata from: d:\VSCode\re-assistant\lib\data\full_mails.jsonl
Successfully loaded 11688 records for metadata.
Connecting to ChromaDB Vector Store...
Successfully connected to ChromaDB collection.


In [4]:
def format_date(d):
    if isinstance(d, datetime):
        return d.strftime('%Y-%m-%d %H:%M:%S')
    elif isinstance(d, str):
        return d
    return 'N/A'

In [5]:
def match_value_in_columns(value, column_value):
    """
    Check if the global `value` matches any entry in `column_value (from, to, cc)`.

    Matching rules:
      1. If `column_value` is a list → check each item.
      2. If `column_value` is a string → check directly.
      3. A match is considered valid if:
            - `sender` is an exact substring, OR
            - fuzzy string similarity (partial_ratio) > 50.
      4. If no match found or input invalid → return False.
    """
    if not isinstance(value, str) or not value:
        return False

    # Case 1: column_value is a list
    if isinstance(column_value, list):
        for e in column_value:
            if value in e or fuzz.partial_ratio(value.lower(), e.lower()) > 85:
                return True
        return False

    # Case 2: column_value is a string
    if isinstance(column_value, str):
        return value in column_value or fuzz.partial_ratio(value.lower(), column_value.lower()) > 85

    return False

In [6]:
def normalize_email_field(*values):
    """Normalize one or more email fields into clean lowercase emails."""
    normalized_emails = []
    
    for value in values:
        # Polars Series safe check
        if isinstance(value, pl.Series):
            if value.is_empty():
                continue
            value = value.to_list()

        if not value:
            continue

        if isinstance(value, list):
            for v in value:
                cleaned = re.sub(r'[\"\'<>]', '', v)
                normalized_emails.append(cleaned.strip().lower())
        else:
            cleaned = re.sub(r'[\"\'<>]', '', value)
            normalized_emails.append(cleaned.strip().lower())

    return normalized_emails

In [8]:
def normalize_list(lst) -> str:
    normalized = []

    if isinstance(lst, list):
        for i in lst:
            val = normalize_email_field(i)
            if isinstance(val, list):
                normalized.extend(map(str, val))  # flatten if list
            elif val is not None:
                normalized.append(str(val))

    elif lst is not None:
        val = normalize_email_field(lst)
        if isinstance(val, list):
            normalized.extend(map(str, val))
        elif val is not None:
            normalized.append(str(val))

    return ",".join(normalized)


In [9]:
# Helper to safely extract values
def safe_get(row, key, default=""):
    value = row.get(key, default) if isinstance(row, dict) else row[key]
    if value is None or str(value).lower() in {"nan", "none"}:
        return default
    return str(value)

In [10]:
from langchain_core.output_parsers import StrOutputParser
from langchain.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from lib.utils import AGENT_MODEL

In [11]:
template = """
You are an assistant that summarizes email conversations.
Read the full thread and give a short, clear summary in plain English that anyone can understand.
Rules:
- Keep it brief: 7-15 sentences.
- Focus on the main people, topic, and outcome.
- Skip technical details, metadata, and signatures.
- Combine the whole thread into one simple story.
- Stay neutral and clear.
"""
prompt_perspectives = ChatPromptTemplate.from_template(template)

generate_summary = (
    prompt_perspectives 
    | ChatOpenAI(model=AGENT_MODEL, temperature=0) | StrOutputParser()
)

In [None]:
# @tool("conversation_retriever_tool", parse_docstring=True)
def conversation_retriever_tool(
    id: str = None,
    subject: str = None,
    sender: str = None,
    recipient: str = None,
    cc: str = None
) -> str:
    """
    Retrieves the full history of an email conversation based on given parameters.
    It will try to resolve the unique thread using id, threadId, or a combination of filters.

    Args:
        id (str, optional): Unique mail id (fastest lookup if present).
        subject (str, optional): Email subject (supports partial match).
        sender (str, optional): Email address or name of sender (case-insensitive).
        recipient (str, optional): Email address or name of recipient (case-insensitive).
        cc (str, optional): Email address or name of a CC recipient (case-insensitive).

    Returns:
        str: Full conversation history including metadata and email content.
    """
    print(f"conversation_retriever_tool is being called {id}, {subject}, {sender}, {recipient}, {cc}")
    temp_df = df.clone()
    mask = pl.lit(True)

    if id:
        mask = mask & (pl.col("id") == id)

    if sender:
        sender = sender.lower()
        # Add a normalized column
        temp_df = temp_df.with_columns([
            pl.col("from").map_elements(normalize_list, return_dtype=str).alias("from_normalized")
        ])
        # Filter rows where the normalized 'from' matches sender
        sender_mask = pl.col("from_normalized").map_elements(lambda x: match_value_in_columns(sender, x), return_dtype=bool)
        mask = mask & sender_mask

    if recipient:
        recipient = recipient.lower()
        # Normalize 'to' and 'cc' columns which are lists
        temp_df = temp_df.with_columns([
            pl.col("to").map_elements(normalize_list, return_dtype=str).alias("to_normalized"),
            pl.col("cc").map_elements(normalize_list, return_dtype=str).alias("cc_normalized")
        ])
        # Filter rows where any normalized 'to' or 'cc' matches the recipient
        recipient_mask = (
            pl.col("to_normalized").map_elements(lambda x: match_value_in_columns(recipient, x), return_dtype=bool) |
            pl.col("cc_normalized").map_elements(lambda x: match_value_in_columns(recipient, x), return_dtype=bool)
        )
        mask = mask & recipient_mask

    # Step 3 → Apply filters only if still not narrowed
    if subject:
        subject = subject.lower()
        # Add a normalized column
        temp_df = temp_df.with_columns([
            pl.col("subject").map_elements(normalize_list, return_dtype=str).alias("subject_normalized")
        ])
        subject_mask = pl.col("subject_normalized").map_elements(lambda x: match_value_in_columns(subject, x), return_dtype=bool)
        mask = mask & subject_mask

    temp_df = temp_df.filter(mask)

    if temp_df.is_empty():
        return "No conversation found."

    resolved_id = temp_df[0, "id"]
    thread_df = temp_df.filter(pl.col("id") == resolved_id)

    # Build full conversation
    full_conversation = "\n\n--- EMAIL ---\n\n".join([
        f"ID: {safe_get(row, 'id')}\n"
        f"From: {safe_get(row, 'from')}\n"
        f"To: {safe_get(row, 'to')}\n"
        f"CC: {safe_get(row, 'cc')}\n"
        f"Subject: {safe_get(row, 'subject')}\n"
        f"Date: {safe_get(row, 'date')}\n\n"
        f"Snippet: {safe_get(row, 'snippet')}\n\n"
        f"Body:\n{safe_get(row, 'body')}\n\n"
        f"Labels: {safe_get(row, 'labels')}\n"
        f"Attachments: {safe_get(row, 'attachments')}\n"
        for row in thread_df.iter_rows(named=True)
    ])

    # Pass to summary generator
    # summary = generate_summary.invoke({
    #     "question": f"Summarize this email thread:\n{full_conversation}"
    # })

    return full_conversation

In [23]:
print(conversation_retriever_tool(None,"Requirement of MS-PIPES / MPL STEEL", "projects1@mahalakshmiprofiles.in"))

conversation_retriever_tool is being called None, Requirement of MS-PIPES / MPL STEEL, projects1@mahalakshmiprofiles.in, None, None
ID: 18ff756dab52dfeb
From: projects 1 <projects1@mahalakshmiprofiles.in>
To: ['info@jalsaventures.com', 'vazhraanirmaan@gmail.com', 'info@cmgbuilders.in', 'sales@sterlingheights.in', '"contact@2getherments.com" <contact@2getherments.com>']
CC: ['GM Projects <gmprojects@mahalakshmiprofiles.in>']
Subject: Requirement of MS-PIPES / MPL STEEL
Date: 2024-06-08T15:43:11Z

Snippet: Dear Sir/Madam, Greetings from MPL Group! We are delighted to introduce &#39;MPL Steel Pipes&#39;, headquartered in Hyderabad, as a premier manufacturer of MS Steel Pipes &amp; Tubes. Committed to

Body:
{'text': '', 'html': ''}

Labels: ['CATEGORY_PERSONAL', 'INBOX']
Attachments: [{'attachmentId': 'ANGjdJ9Sq2EmtcCnjYyNsZmgy7TmHRQFy_G5IQ5bW4IBO4i-avBP00gJ3BNtjOtyHwjz4cH6qNQiS1BwwcKTcXE9oVhPu_JU7O5EOcbEGKTwUlzHC-6vpdQj1UuzkMHt4FSI6nnVX6yat2pjhxcgYjnoY2bjOJ706K4u70KSb9zQf1qEtxgJvrb9QHoiA