In [1]:
import sys
sys.path.append(r"d:\VSCode\re-assistant")

In [2]:
from langchain.tools import tool
from lib.dataframe import df
from rapidfuzz import fuzz
import re

Loading email data-set...
2.3.2
Successfully loaded 11688 records.


In [3]:
def normalize_email_field(value):
    if not value:
        return ""
    # Remove quotes and angle brackets
    value = re.sub(r'[\"\'<>]', '', value)
    return value.strip().lower()

In [4]:
@tool("metadata_filtering_tool", parse_docstring=True)
def metadata_filtering_tool(
    sender: str = None,
    recipient: str = None,
    start_date: str = None,
    end_date: str = None,
    subject: str = None,
    threadId: str = None,
    sort_by: str = "date",
    sort_order: str = "desc",
    limit: int = 10
) -> str:
    """
    This tool filter emails based on metadata such as sender, recipient, date range, subject, or thread ID.
    
    Args:
        sender (str or list of str, optional): Filter emails by sender(s). Can be full email address, partial email, or sender names (case-insensitive).
        recipient (str or list of str, optional): Filter emails by recipient(s). Can be full email addresses, partial emails, or recipient names (case-insensitive).
        start_date (str, optional): Filter emails sent on or after this date. Format: 'YYYY-MM-DD' or 'YYYY-MM-DD HH:MM:SS'.
        end_date (str, optional): Filter emails sent on or before this date. Format: 'YYYY-MM-DD' or 'YYYY-MM-DD HH:MM:SS'.
        subject (str, optional): Filter emails containing this keyword in the subject (case-insensitive, partial match supported).
        threadId (str, optional): Filter emails belonging to a specific thread ID.
        sort_by (str, optional): Column to sort the results by. Default is 'date'.
        sort_order (str, optional): Sort order: 'asc' for ascending, 'desc' for descending. Default is 'desc'.
        limit (int, optional): Maximum number of results to return. Default is 10.
    """

    print(f"metadata_filtering_tool is being called {sender}, {recipient}, {start_date}, {end_date}, {subject}, {threadId}, {sort_by}, {sort_order}, {limit}")
    temp_df = df.copy()
    
    # --- Sender filter (case-insensitive, matches name or email) ---
    if sender:
        temp_df['from_normalized'] = temp_df['from'].apply(normalize_email_field)
        sender = sender.lower()
        temp_df = temp_df[temp_df['from_normalized'].str.contains(sender)]

    # --- Recipient filter ---
    if recipient:
        def normalize_list(lst):
            return ','.join([normalize_email_field(i) for i in lst]) if isinstance(lst, list) else ""
        temp_df['to_normalized'] = temp_df['to'].apply(normalize_list)
        temp_df['cc_normalized'] = temp_df['cc'].apply(normalize_list)
        recipient = recipient.lower()
        temp_df = temp_df[temp_df['to_normalized'].str.contains(recipient) | temp_df['cc_normalized'].str.contains(recipient)]

    print(temp_df, "temp_df from metadata_filtering_tool")

    # --- Date filtering (normalize to datetime) ---
    if start_date:
        try:
            temp_df = temp_df[temp_df['date'] >= start_date]
        except Exception as e:
            return f"Error parsing start_date: {e}"

    if end_date:
        try:
            temp_df = temp_df[temp_df['date'] <= end_date]
        except Exception as e:
            return f"Error parsing end_date: {e}"

    # --- Subject filter ---
    if subject:
        temp_df = temp_df[temp_df['subject'].astype(str).str.lower() .contains(subject.lower(), case=False, na=False)]

    # --- ThreadId filter ---
    if threadId:
        temp_df = temp_df[temp_df['threadId'] == threadId]
  
    # --- Handle empty result ---
    if temp_df.empty:
        return "No emails found matching the specified criteria."
    
    # --- Sorting ---
    ascending = (sort_order.lower() == "asc")
    if sort_by not in temp_df.columns:
        sort_by = "date"
    temp_df = temp_df.sort_values(by=sort_by, ascending=ascending)

    # --- Total count ---
    total_matches = len(temp_df)

    # --- Preview results ---
    results_preview = temp_df.head(limit)[['threadId', 'from', 'to', 'subject', 'date']].to_dict('records')
        
    formatted_results = "\n\n---\n\n".join([
        f"threadId: {res.get('threadId', 'N/A')}\n"
        f"From: {res.get('from', 'N/A')}\n"
        f"To: {res.get('to', 'N/A')}\n"
        f"Subject: {res.get('subject', 'N/A')}\n"
        f"Date: {res.get('date', 'N/A').strftime('%Y-%m-%d %H:%M:%S') if res.get('date') else 'N/A'}"
        for res in results_preview
    ])

    print(formatted_results, "formatted_results from metadata_filtering_tool")
    return f"Found a total of {total_matches} emails matching the criteria. Here are the {min(limit, total_matches)} most relevant:\n\n{formatted_results}"