# HELPMATE AI || E-MAIL SEARCH SYSTEM

In [6]:
## Importing necessary libraries
import pandas as pd
import re
import ast
import chromadb
import openai
import numpy as np

from chromadb.utils import embedding_functions
import os

from sentence_transformers import CrossEncoder, util

In [7]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [8]:
## Importing data from location
df_email = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/helpmateAI/email_thread_details.csv')
df_summary = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/helpmateAI/email_thread_summaries.csv')

#### CHECKING DATA STRUCTURE AND IT'S INFO

In [9]:
df_email.head()

Unnamed: 0,thread_id,subject,timestamp,from,to,body
0,1,FW: Master Termination Log,2002-01-29 11:23:42,"Gossett, Jeffrey C. JGOSSET","['Giron', 'Darron C. Dgiron', 'Love', 'Phillip...",\n\n -----Original Message-----\nFrom: =09Ther...
1,1,FW: Master Termination Log,2002-01-31 12:50:00,"Theriot, Kim S. KTHERIO","['Murphy', 'Melissa Mmurphy', 'Gossett', 'Jeff...",\n\n -----Original Message-----\nFrom: =09Panu...
2,1,FW: Master Termination Log,2002-02-05 15:03:35,"Theriot, Kim S. KTHERIO","['Murphy', 'Melissa Mmurphy', 'Anderson', 'Dia...",Note to Stephanie Panus....\n\nStephanie...ple...
3,1,FW: Master Termination Log,2002-02-05 15:06:25,"Theriot, Kim S. KTHERIO","['Hall', 'D. Todd Thall', 'Sweeney', 'Kevin Ks...",\n\n -----Original Message-----\nFrom: =09Panu...
4,1,FW: Master Termination Log,2002-05-28 07:20:35,"Kelly, Katherine L. KKELLY","['Germany', 'Chris Cgerman']",\n\n -----Original Message-----\nFrom: =09McMi...


In [10]:
df_summary.head()

Unnamed: 0,thread_id,summary
0,1,The email thread discusses the Master Terminat...
1,2,A lunch meeting has been scheduled for May 5th...
2,3,Ben is updating a friend on his progress with ...
3,4,The recipient of the email thread initially ex...
4,5,The email thread discusses the long form confi...


In [11]:
df_email.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21684 entries, 0 to 21683
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   thread_id  21684 non-null  int64 
 1   subject    21684 non-null  object
 2   timestamp  21684 non-null  object
 3   from       21684 non-null  object
 4   to         21684 non-null  object
 5   body       21684 non-null  object
dtypes: int64(1), object(5)
memory usage: 1016.6+ KB


In [12]:
df_summary.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4167 entries, 0 to 4166
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   thread_id  4167 non-null   int64 
 1   summary    4167 non-null   object
dtypes: int64(1), object(1)
memory usage: 65.2+ KB


We have two data frames one have complete mail and another one has their summary.
<br>
We can use summary to get more relevant results, hence merging both the dataframes.

In [13]:
## Merging dataframe to add summary.
df = pd.merge(df_email,df_summary,on='thread_id',how='inner')

In [14]:
df.head()

Unnamed: 0,thread_id,subject,timestamp,from,to,body,summary
0,1,FW: Master Termination Log,2002-01-29 11:23:42,"Gossett, Jeffrey C. JGOSSET","['Giron', 'Darron C. Dgiron', 'Love', 'Phillip...",\n\n -----Original Message-----\nFrom: =09Ther...,The email thread discusses the Master Terminat...
1,1,FW: Master Termination Log,2002-01-31 12:50:00,"Theriot, Kim S. KTHERIO","['Murphy', 'Melissa Mmurphy', 'Gossett', 'Jeff...",\n\n -----Original Message-----\nFrom: =09Panu...,The email thread discusses the Master Terminat...
2,1,FW: Master Termination Log,2002-02-05 15:03:35,"Theriot, Kim S. KTHERIO","['Murphy', 'Melissa Mmurphy', 'Anderson', 'Dia...",Note to Stephanie Panus....\n\nStephanie...ple...,The email thread discusses the Master Terminat...
3,1,FW: Master Termination Log,2002-02-05 15:06:25,"Theriot, Kim S. KTHERIO","['Hall', 'D. Todd Thall', 'Sweeney', 'Kevin Ks...",\n\n -----Original Message-----\nFrom: =09Panu...,The email thread discusses the Master Terminat...
4,1,FW: Master Termination Log,2002-05-28 07:20:35,"Kelly, Katherine L. KKELLY","['Germany', 'Chris Cgerman']",\n\n -----Original Message-----\nFrom: =09McMi...,The email thread discusses the Master Terminat...


In [15]:
df.shape

(21684, 7)

## PREPROCESSING OF DATA

Checking few instances of mails.

In [16]:
df.body[0]

'\n\n -----Original Message-----\nFrom: =09Theriot, Kim S. =20\nSent:=09Tuesday, January 29, 2002 1:23 PM\nTo:=09Richardson, Stacey; Anderson, Diane; Gossett, Jeffrey C.; White, Stac=\ney W.; Murphy, Melissa; Hall, D. Todd; Sweeney, Kevin\nCc:=09Aucoin, Evelyn; Baxter, Bryce; Wynne, Rita\nSubject:=09FW: Master Termination Log\n\n\n\n -----Original Message-----\nFrom: =09Panus, Stephanie =20\nSent:=09Tuesday, January 29, 2002 11:39 AM\nTo:=09Adams, Laurel; Alonso, Tom; Aronowitz, Alan; Bailey, Susan; Balfour-F=\nlanagan, Cyndie; Baughman, Edward; Belden, Tim; Bishop, Serena; Brackett, D=\nebbie R.; Bradford, William S.; Browning, Mary Nell; Bruce, James; Bruce, M=\nichelle; Bruce, Robert; Buerkle, Jim; Calger, Christopher F.; Carrington, C=\nlara; Considine, Keith; Cordova, Karen A.; Crandall, Sean; Cutsforth, Diane=\n; Diamond, Russell; Dunton, Heather; Edison, Susan; Elafandi, Mo; Fischer, =\nMark; Flores, Nony; Fondren, Mark; Gorny, Vladimir; Gorte, David; Gresham, =\nWayne; Hagelman

In [17]:
df.body[1]

'\n\n -----Original Message-----\nFrom: =09Panus, Stephanie =20\nSent:=09Thursday, January 31, 2002 12:08 PM\nTo:=09Adams, Laurel; Albrecht, Kristin; Alonso, Tom; Aronowitz, Alan; Baile=\ny, Susan; Balfour-Flanagan, Cyndie; Baughman, Edward; Belden, Tim; Bishop, =\nSerena; Boyd, Samantha; Brackett, Debbie R.; Bradford, William S.; Browning=\n, Mary Nell; Bruce, James; Bruce, Michelle; Bruce, Robert; Buerkle, Jim; Ca=\nlger, Christopher F.; Carrington, Clara; Considine, Keith; Cordova, Karen A=\n.; Crandall, Sean; Cutsforth, Diane; Diamond, Russell; Dunton, Heather; Edi=\nson, Susan; Elafandi, Mo; Fischer, Mark; Flores, Nony; Fondren, Mark; Gorny=\n, Vladimir; Gorte, David; Gresham, Wayne; Hagelmann, Bjorn; Hall, Steve C. =\n(Legal); Harkness, Cynthia; Hendry, Brent; Johnston, Greg; Keohane, Peter; =\nLindeman, Cheryl; Mallory, Chris; Mann, Kay; Mcginnis, Stephanie; McGrory, =\nRobert; McMichael Jr., Ed; Miller, Don (Asset Mktg); Moore, Janet H.; Moran=\n, Tom; Murphy, Harlan; Murray, J

We can see there are lots of noise in email body, from

In [18]:
df.body[3]

"\n\n -----Original Message-----\nFrom: =09Panus, Stephanie =20\nSent:=09Tuesday, February 05, 2002 4:59 PM\nTo:=09Adams, Laurel; Albrecht, Kristin; Alonso, Tom; Aronowitz, Alan; Baile=\ny, Susan; Balfour-Flanagan, Cyndie; Baughman, Edward; Belden, Tim; Bishop, =\nSerena; Boyd, Samantha; Brackett, Debbie R.; Bradford, William S.; Browning=\n, Mary Nell; Bruce, James; Bruce, Michelle; Bruce, Robert; Buerkle, Jim; Ca=\nlger, Christopher F.; Carrington, Clara; Chilkina, Elena; Considine, Keith;=\n Cordova, Karen A.; Crandall, Sean; Cutsforth, Diane; Diamond, Russell; Dun=\nton, Heather; Edison, Susan; Elafandi, Mo; Fischer, Mark; Flores, Nony; Fon=\ndren, Mark; Glover, Sheila; Gorny, Vladimir; Gorte, David; Gresham, Wayne; =\nHagelmann, Bjorn; Hall, Steve C. (Legal); Harkness, Cynthia; Hendry, Brent;=\n Johnston, Greg; Keohane, Peter; Lindeman, Cheryl; Mallory, Chris; Mann, Ka=\ny; Mcginnis, Stephanie; McGrory, Robert; McMichael Jr., Ed; Miller, Don (As=\nset Mktg); Moore, Janet H.; Moran

In [19]:
## Creating a function to clean the email to remove noise using regular expression.
def prepare_email_for_search(row):
    """
    Prepares a cleaned version of an email for semantic search.

    Takes a row (Series) with at least: 'from', 'to', 'timestamp', 'subject', 'body', 'summary' columns.
    Returns a cleaned string combining important metadata + cleaned body.
    """

    #Clean the email body
    body = row['body']

    # Fix soft line breaks and encoded spaces
    body = body.replace("=\n", "").replace("=09", " ").replace("=20", " ")

    # Remove internal "From:", "Sent:", "To:", "Subject:", "Date:", "Cc:" lines
    body = re.sub(r"(?i)(^|\n)(From|Sent|To|Subject|Date|Cc):.*(\n|$)", "\n", body)

    # Remove "-----Original Message-----" lines
    body = re.sub(r"(?i)-----Original Message-----", "", body)

    # Remove "Inline attachment" lines
    body = re.sub(r"(?i)--------- Inline attachment follows ---------", "", body)

    # Remove excessive empty lines
    body = re.sub(r"\n\s*\n", "\n\n", body)
    body = body.strip()

    # Create the final searchable text
    searchable_text = f"""\
From: {row['from']}
To: {row['to']}
Timestamp: {row['timestamp']}
Subject: {row['subject']}
Summary: {row['summary']}

{body}
"""
    return searchable_text

In [20]:
## Applying function to data frame
df['cleaned_text'] = df.apply(prepare_email_for_search, axis=1)

In [21]:
df['cleaned_text'][0]

'From: Gossett, Jeffrey C. JGOSSET\nTo: [\'Giron\', \'Darron C. Dgiron\', \'Love\', \'Phillip M. Plove\']\nTimestamp: 2002-01-29 11:23:42\nSubject: FW: Master Termination Log\nSummary: The email thread discusses the Master Termination Log and the need to investigate a CNG LDC (Hope Gas) termination and a $66 million settlement offer. Stephanie Panus sends out the Daily List and Master Termination Log for various dates. Kim Theriot requests her name and Melissa Murphy\'s name to be removed from the distribution list and adds several names to it. The thread also includes updates on terminations and valid terminations for various companies.\n\nSent: Tuesday, January 29, 2002 1:23 PM\nCc: Aucoin, Evelyn; Baxter, Bryce; Wynne, Rita\n\nSent: Tuesday, January 29, 2002 11:39 AM\nSubject: Master Termination Log\n\nAttached is the Daily Termination List for January 25 as well as the Master Termination Log, which incorporates all terminations received through January 25.\n\nThe following were pre

In [22]:
df['cleaned_text'][100]

"From: Exchange System Administrator <.\nTo: ['jls@fullenweider.com']\nTimestamp: 2001-10-29 12:22:06\nSubject: Undeliverable: Message relayed (jls@fullenweider.com)\nSummary: Joe Stepenovitch sent multiple emails to himself, confirming that his messages were successfully relayed to the recipient jls@fullenweider.com. The emails contain no additional information or content.\n\nTo: Joe.Stepenovitch@enron.com\nSubject: \n\nYour message has been relayed\nto the following recipient(s):\n\njls@fullenweider.com\nMessage relayed\n\nThe message was successfully delivered to 1 recipient(s)\nat domain fullenweider.com.\n\n - C.DTF \n - C.DTF\n"

In [23]:
df['cleaned_text'][22]

"From: Kay Mann\nTo: ['Reagan Rorschach']\nTimestamp: 2001-04-27 09:20:00\nSubject: RE: long form confirm/MDEA\nSummary: The email thread discusses the long form confirm/MDEA agreement. Kay suggests distributing the agreement internally and to the cities, with an explanation that they are still considering it. Edward suggests looking into whether structuring the agreement as MDEA, agent for..., or as a covenant of financial support would be better for the longer term deal. Reagan asks if the issues have been incorporated into the agreement and if it can be sent to Marvin. Kay mentions that there are formatting challenges and lots that need to be fixed. Reagan lists some questions that may arise regarding liquidated damages, events of default and remedies, billing and payment netting, and miscellaneous terms.\n\nI think you can send it just so he has the form, and we can figure out what \nwe need to do with it.\n\nKay\n\nTo: Kay Mann/Corp/Enron@Enron\n\ndo we need to mess with this or c

In [24]:
df.head()

Unnamed: 0,thread_id,subject,timestamp,from,to,body,summary,cleaned_text
0,1,FW: Master Termination Log,2002-01-29 11:23:42,"Gossett, Jeffrey C. JGOSSET","['Giron', 'Darron C. Dgiron', 'Love', 'Phillip...",\n\n -----Original Message-----\nFrom: =09Ther...,The email thread discusses the Master Terminat...,"From: Gossett, Jeffrey C. JGOSSET\nTo: ['Giron..."
1,1,FW: Master Termination Log,2002-01-31 12:50:00,"Theriot, Kim S. KTHERIO","['Murphy', 'Melissa Mmurphy', 'Gossett', 'Jeff...",\n\n -----Original Message-----\nFrom: =09Panu...,The email thread discusses the Master Terminat...,"From: Theriot, Kim S. KTHERIO\nTo: ['Murphy', ..."
2,1,FW: Master Termination Log,2002-02-05 15:03:35,"Theriot, Kim S. KTHERIO","['Murphy', 'Melissa Mmurphy', 'Anderson', 'Dia...",Note to Stephanie Panus....\n\nStephanie...ple...,The email thread discusses the Master Terminat...,"From: Theriot, Kim S. KTHERIO\nTo: ['Murphy', ..."
3,1,FW: Master Termination Log,2002-02-05 15:06:25,"Theriot, Kim S. KTHERIO","['Hall', 'D. Todd Thall', 'Sweeney', 'Kevin Ks...",\n\n -----Original Message-----\nFrom: =09Panu...,The email thread discusses the Master Terminat...,"From: Theriot, Kim S. KTHERIO\nTo: ['Hall', 'D..."
4,1,FW: Master Termination Log,2002-05-28 07:20:35,"Kelly, Katherine L. KKELLY","['Germany', 'Chris Cgerman']",\n\n -----Original Message-----\nFrom: =09McMi...,The email thread discusses the Master Terminat...,"From: Kelly, Katherine L. KKELLY\nTo: ['German..."


In [25]:
df['subject'][1]

'FW: Master Termination Log'

Subject column can be cleaned by removing FW, RE, FWD at the begenning.

In [26]:
## Using Regular expression to clean subject column.
def clean_subject(subject: str) -> str:
    if not isinstance(subject, str):
        return ""

    # Remove multiple "RE:", "FW:", "FWD:", etc. from start
    subject = re.sub(r'^(?:\s*(RE|FW|FWD)\s*[:：])+', '', subject, flags=re.IGNORECASE)

    # Remove extra spaces
    subject = subject.strip()

    return subject

In [27]:
## Applying function to subject column
df['subject'] = df['subject'].apply(clean_subject)

In [28]:
df['subject'][0]

'Master Termination Log'

In [29]:
## Creating dictionary of meta data for reach row.
df['metadata'] = df.apply(lambda x: {'thread_id':x['thread_id'],'Subject':x['subject'],'TimeStamp':x['timestamp'],'Sender':x['from'],'Receiver':x['to'],'summary':x['summary']}, axis=1)

## GENERATING EMBEDDINGS USING CHROMADB USING OPENAI EMBEDDINGS

In [31]:
## Importing API key
from google.colab import userdata
api_key = userdata.get('API_Key')

In [32]:
path = '/content/drive/MyDrive/Colab Notebooks/helpmateAI'

In [33]:
## Creating an persistant client
client = chromadb.PersistentClient(path=path)

In [34]:
## assigning embedding modela and creating a function
model = "text-embedding-ada-002"
embedding_function = embedding_functions.OpenAIEmbeddingFunction(api_key=api_key, model_name=model)

In [35]:
## Creating a collection to store email data
email_search_collection = client.get_or_create_collection(name='Email_data_for_search',embedding_function=embedding_function)

In [36]:
## used this code to add data in collection in batches.
## Code is not required not as we have created a persistant client.
"""batch_size = 100  # Adjust this based on the size of your emails and the rate limit

for i in range(0, len(df_filtered), batch_size):
    batch_df = df_filtered[i:i + batch_size]
    documents = batch_df['cleaned_text'].to_list()
    ids = [str(j) for j in range(i, i + len(batch_df))]
    metadatas = batch_df['metadata'].to_list()

    try:
        email_search_collection.add(
            documents=documents,
            ids=ids,
            metadatas=metadatas
        )
        print(f"Added batch {i // batch_size + 1}/{len(df_filtered) // batch_size + 1}")
    except Exception as e:
        print(f"Error adding batch {i // batch_size + 1}: {e}")
        # You might want to add more sophisticated error handling here,
        # like retrying or logging the failed batch.

print("Finished adding emails in batches.")"""

'batch_size = 100  # Adjust this based on the size of your emails and the rate limit\n\nfor i in range(0, len(df_filtered), batch_size):\n    batch_df = df_filtered[i:i + batch_size]\n    documents = batch_df[\'cleaned_text\'].to_list()\n    ids = [str(j) for j in range(i, i + len(batch_df))]\n    metadatas = batch_df[\'metadata\'].to_list()\n\n    try:\n        email_search_collection.add(\n            documents=documents,\n            ids=ids,\n            metadatas=metadatas\n        )\n        print(f"Added batch {i // batch_size + 1}/{len(df_filtered) // batch_size + 1}")\n    except Exception as e:\n        print(f"Error adding batch {i // batch_size + 1}: {e}")\n        # You might want to add more sophisticated error handling here,\n        # like retrying or logging the failed batch.\n\nprint("Finished adding emails in batches.")'

In [37]:
## checking embeddings.
email_search_collection.get(
    ids = ['0','1','2'],
    include = ['embeddings', 'documents', 'metadatas']
)

{'ids': ['0', '1', '2'],
 'embeddings': array([[-0.01325212,  0.00536308, -0.00442958, ..., -0.0175133 ,
         -0.0165615 , -0.01669328],
        [-0.01324027,  0.00833566, -0.0155156 , ..., -0.01566007,
         -0.02415464, -0.01807264],
        [-0.01340328,  0.00472112, -0.01744282, ..., -0.01010599,
         -0.03026087, -0.01651501]]),
 'documents': ['From: Gossett, Jeffrey C. JGOSSET\nTo: [\'Giron\', \'Darron C. Dgiron\', \'Love\', \'Phillip M. Plove\']\nTimestamp: 2002-01-29 11:23:42\nSubject: FW: Master Termination Log\nSummary: The email thread discusses the Master Termination Log and the need to investigate a CNG LDC (Hope Gas) termination and a $66 million settlement offer. Stephanie Panus sends out the Daily List and Master Termination Log for various dates. Kim Theriot requests her name and Melissa Murphy\'s name to be removed from the distribution list and adds several names to it. The thread also includes updates on terminations and valid terminations for various com

In [38]:
## Running a test query
user_query = input()
result = email_search_collection.query(query_texts=user_query,n_results=3)
result

last mail from Lindy Donoho


{'ids': [['741', '18890', '18894']],
 'embeddings': None,
 'documents': [["From: Whatley, Nan NOTESADDR/CN=5114A3F6-3B8FA1BF-862564C2-5420CA\nTo: ['Donoho', 'Lindy LDONOHO']\nTimestamp: 2001-10-03 07:30:24\nSubject: Web Site\nSummary: The email thread includes various unrelated messages. Lindy mistakenly provides the wrong website address. Winston announces his departure from Enron and shares a website where he will document his ski trip. Mark Taylor forwards an email regarding a legal seminar and asks for an update on the dates. Michael Etringer responds, mentioning that Christian will provide a definitive date for the seminar and shares a website that is currently a template.\n\nLindy\n\nThat website is www.cowparadehouston.com\n\nYour pal\nNan\n",
   'From: Lindberg, Lorraine LLINDBE\nTo: [\'Donoho\', \'Lindy Ldonoho\']\nTimestamp: 2001-10-25 07:57:54\nSubject: FW: Osama Movie\nSummary: The email thread consists of multiple forwards of a link to a humorous Osama movie. TK Lohman fin

## CREATING CACHE COLLECTION AND SEARCH FUNCTION

In [39]:
## Creating a cache collection.
cache_collection = client.get_or_create_collection(name='Email_Cache', embedding_function=embedding_function)

In [41]:
#taking input from user.
query = input()

last mail from Lindy Donoho


In [42]:
##Function to search in cache, check similarity based on threshold.
## If not found in cache, searc in main collection.
## and store the new response in cache for future use.
def search_with_cache(query, threshold=0.2):
    cache_results = cache_collection.query(
    query_texts=query,
    n_results=1
    )
    ids = []
    documents = []
    distances = []
    metadatas = []
    results_df = pd.DataFrame()

    # If the distance is greater than the threshold, fetch from main collection
    if cache_results['distances'][0] == [] or cache_results['distances'][0][0] > threshold:
        results = email_search_collection.query(
            query_texts=query,
            n_results=10
        )

        # Store the query and results into the cache
        Keys = []
        Values = []
        for key, val in results.items():
            if val is None:
                continue
            for i in range(min(10, len(val[0]) if isinstance(val[0], list) else len(val))):
              Keys.append(str(key) + str(i))
              if isinstance(val[0], list):
                Values.append(str(val[0][i]))
              else:
                Values.append(str(val[i]))

        cache_collection.add(
            documents=[query],
            ids=[query],
            metadatas=dict(zip(Keys, Values))
        )

        print("Not found in cache. Found in main collection.")

        result_dict = {
            'Metadatas': results['metadatas'][0],
            'Documents': results['documents'][0],
            'Distances': results['distances'][0],
            "IDs": results["ids"][0]
        }
        results_df = pd.DataFrame.from_dict(result_dict)

    # If distance is within threshold, fetch from cache
    elif cache_results['distances'][0][0] <= threshold:
        cache_result_dict = cache_results['metadatas'][0][0]

        for key, value in cache_result_dict.items():
            if 'ids' in key:
                ids.append(value)
            elif 'documents' in key:
                documents.append(value)
            elif 'distances' in key:
                distances.append(value)
            elif 'metadatas' in key:
                metadatas.append(value)

        print("Found in cache!")

        results_df = pd.DataFrame({
            'IDs': ids,
            'Documents': documents,
            'Distances': distances,
            'Metadatas': metadatas
        })

    return results_df

In [43]:
results_df = search_with_cache(query)

Not found in cache. Found in main collection.


In [44]:
## Looking into cache collection
cache_collection.peek()

{'ids': ['Give me data on termination log',
  'Most latest mail sent by Jane M Tholt',
  'Can you summarize Southwest Gas-Penalty',
  'last mail from Lindy Donoho'],
 'embeddings': array([[-0.01687457, -0.01358091,  0.00221452, ...,  0.00981569,
         -0.00457775, -0.00909747],
        [-0.03925658, -0.00956845, -0.00350798, ..., -0.0059854 ,
          0.00274359, -0.03035697],
        [-0.0112499 ,  0.0066032 ,  0.01903145, ...,  0.00890061,
         -0.00307371, -0.02967365],
        [-0.0371049 , -0.01226112,  0.00347072, ..., -0.02064956,
          0.00248857, -0.03802763]]),
 'documents': ['Give me data on termination log',
  'Most latest mail sent by Jane M Tholt',
  'Can you summarize Southwest Gas-Penalty',
  'last mail from Lindy Donoho'],
 'uris': None,
 'included': ['metadatas', 'documents', 'embeddings'],
 'data': None,
 'metadatas': [{'documents2': 'From: Theriot, Kim S. KTHERIO\nTo: [\'Murphy\', \'Melissa Mmurphy\', \'Gossett\', \'Jeffrey C. Jgosset\', \'White\', \'Sta

In [45]:
results_df

Unnamed: 0,Metadatas,Documents,Distances,IDs
0,"{'Receiver': '['Donoho', 'Lindy LDONOHO']', 's...","From: Whatley, Nan NOTESADDR/CN=5114A3F6-3B8FA...",0.282467,741
1,"{'Receiver': '['Donoho', 'Lindy Ldonoho']', 'S...","From: Lindberg, Lorraine LLINDBE\nTo: ['Donoho...",0.301211,18890
2,{'summary': 'The email thread consists of mult...,"From: Donoho, Lindy LDONOHO\nTo: [""'adonoho@ho...",0.317297,18894
3,{'summary': 'The headhunter wants to speak wit...,"From: Fawcett, Lisa LFAWCETT\nTo: ['Donoho', '...",0.329655,1381
4,"{'TimeStamp': '2002-01-28 11:19:11', 'Subject'...","From: Donoho, Lindy LDONOHO\nTo: [""'adonoho@ho...",0.34639,20446
5,"{'Subject': 'Hey!', 'Sender': 'Dana Davis', 'R...",From: Dana Davis\nTo: ['bjackson@sswpc.com']\n...,0.348317,6137
6,"{'Subject': 'CA Capacity Report', 'Receiver': ...","From: Watson, Kimberly KWATSON\nTo: ['Donoho',...",0.353978,2639
7,"{'Sender': 'Jeff Dasovich', 'thread_id': 1859,...",From: Jeff Dasovich\nTo: ['rcarroll@bracepatt....,0.356339,9749
8,"{'Subject': 'SoCal Gas CPUC Proceeding', 'Send...","From: Donoho, Lindy LDONOHO\nTo: ['Rapp', 'Bil...",0.356449,13493
9,"{'thread_id': 1175, 'Receiver': '['mbeckworth@...",From: Gerald Nemec\nTo: ['mbeckworth@velaw.com...,0.356967,6136


In [46]:
results_df['Documents'][0]

"From: Whatley, Nan NOTESADDR/CN=5114A3F6-3B8FA1BF-862564C2-5420CA\nTo: ['Donoho', 'Lindy LDONOHO']\nTimestamp: 2001-10-03 07:30:24\nSubject: Web Site\nSummary: The email thread includes various unrelated messages. Lindy mistakenly provides the wrong website address. Winston announces his departure from Enron and shares a website where he will document his ski trip. Mark Taylor forwards an email regarding a legal seminar and asks for an update on the dates. Michael Etringer responds, mentioning that Christian will provide a definitive date for the seminar and shares a website that is currently a template.\n\nLindy\n\nThat website is www.cowparadehouston.com\n\nYour pal\nNan\n"

## ENABELING CROSS ENCODER FUNCTIONALITY

In [47]:
## Creatign cross encoder object
cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/794 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.33k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/132 [00:00<?, ?B/s]

In [48]:
## Function to return top 3 most relevant results.
def top_3_results(query,result_df):
  cross_input = [[query,response] for response in result_df['Documents']]
  result_df['ranked_score'] = cross_encoder.predict(cross_input)
  new_df = result_df.sort_values(by='ranked_score',  ascending=False)
  return new_df[["Documents", "Metadatas"]][:3]

In [49]:
new_result_df = top_3_results(query,results_df)

In [50]:
new_result_df

Unnamed: 0,Documents,Metadatas
0,"From: Whatley, Nan NOTESADDR/CN=5114A3F6-3B8FA...","{'Receiver': '['Donoho', 'Lindy LDONOHO']', 's..."
6,"From: Watson, Kimberly KWATSON\nTo: ['Donoho',...","{'Subject': 'CA Capacity Report', 'Receiver': ..."
1,"From: Lindberg, Lorraine LLINDBE\nTo: ['Donoho...","{'Receiver': '['Donoho', 'Lindy Ldonoho']', 'S..."


## GENERATIVE SEARCH USING RAG

In [51]:
from google.colab import userdata
openai.api_key = userdata.get('API_Key')

In [52]:
## Using completions API and prompt engineering for generating response based in query and retreived data.
def generate_response(query, results_df):
    system_message = f"""
    You are an expert assistant specialized in extracting the most relevant information from a structured dataset in response to a user query: '{query}'.

    Your job is to:
    1. Fully understand the user's intent and context.
    2. Search carefully within the dataset provided below and extract the most relevant and direct information that satisfies the user's query.
    3. Respond as if you are providing your findings for a colleague — clear and authoritative.

    Your answer must include:
    - A  exact **Subject Line** present in respective metadata.
    - A single, clear **Answer** that best fulfills the user's query, based strictly on the dataset provided.
    - If necessary, include supporting **metadata** (e.g., source, date, location) only if it helps clarify or complete the answer.
    - Only provide additional metadata if the user explicitly asks for it.

    If the dataset does **not contain any relevant information**, respond with:
    "Unable to find your data here, anything else I can help you with?"

    Here is the dataset to search from:
    {results_df.to_string(index=False)}
    """

    response = openai.chat.completions.create(
    model="gpt-3.5-turbo",
    messages=[{'role':'system','content':system_message},
              {'role':'user','content':query}]
    )

    return response.choices[0].message.content.split('\n')

In [54]:
generate_response(query,new_result_df)

['**Subject Line:** FW: Osama Movie',
 '',
 '**Answer:** The last email from Lindy Donoho was a forwarded message related to a humorous Osama movie. The email thread included multiple forwards of a link to the movie, with no detailed description of its content but suggesting it was funny. TK Lohman initiated the sharing by finding the movie amusing and forwarding it to colleagues, including Lindy Donoho, who then shared the email with others in the company.',
 '',
 'Let me know if you need more information or assistance!']

## TEST CASES

In [55]:
## Function to provide top_3_results from chromadb and generated output.
def query_pipeline(query):
    results_df = search_with_cache(query)
    if results_df.empty:
        return None, "Unable to find your data here, anything else I can help you with?"

    top_3_df = top_3_results(query, results_df)
    final_answer = generate_response(query, top_3_df)

    return top_3_df, final_answer

In [67]:
test_query_1 = "Can you tell us Southwest Gas-Penalty"
top_3_1, answer_1 = query_pipeline(test_query_1)

Found in cache!


In [68]:
top_3_1

Unnamed: 0,Documents,Metadatas
9,"From: Tori Kuykendall\nTo: ['Genaro Mendoza']\nTimestamp: 2001-04-23 03:13:00\nSubject: Penalty Charges\nSummary: The email thread discusses penalty charges related to gas deals. Mike asks Patti to review the deals and determine if Southwest Gas penalized multiple deals or just one. Lisa provides details of the penalties charged by different counterparties. Laurie mentions that all penalties were netted in payments received from Southwest Gas. Darron forwards an email from Bryce, the Manager of physical gas settlements, with a summary of penalties. Finally, someone states that the charges are not valid and that they will not pay penalties on a swing deal at a constrained point. The trader at Agave has been notified of this decision.\n\nthe trader over there is JB Smith. if you would like - you can send me a \ncopy of the invoice, and we will send them a letter with the invoice attached \n- and tell them to stop sending invoices to you because we consider the \nissure to be resolved.\n","{'Subject': 'Penalty Charges', 'thread_id': 48, 'Receiver': ""['Genaro Mendoza']"", 'TimeStamp': '2001-01-10 02:33:00', 'Sender': 'Tori Kuykendall', 'summary': 'The email thread discusses penalty charges related to gas deals. Mike asks Patti to review the deals and determine if Southwest Gas penalized multiple deals or just one. Lisa provides details of the penalties charged by different counterparties. Laurie mentions that all penalties were netted in payments received from Southwest Gas. Darron forwards an email from Bryce, the Manager of physical gas settlements, with a summary of penalties. Finally, someone states that the charges are not valid and that they will not pay penalties on a swing deal at a constrained point. The trader at Agave has been notified of this decision.'}"
6,"From: Tori Kuykendall\nTo: ['Genaro Mendoza']\nTimestamp: 2001-01-10 02:33:00\nSubject: Penalty Charges\nSummary: The email thread discusses penalty charges related to gas deals. Mike asks Patti to review the deals and determine if Southwest Gas penalized multiple deals or just one. Lisa provides details of the penalties charged by different counterparties. Laurie mentions that all penalties were netted in payments received from Southwest Gas. Darron forwards an email from Bryce, the Manager of physical gas settlements, with a summary of penalties. Finally, someone states that the charges are not valid and that they will not pay penalties on a swing deal at a constrained point. The trader at Agave has been notified of this decision.\n\nthese charges are not valid - we will not pay penalties on a swing deal at a \nconstrained point. several people on the desk discussed this potential \nproblem, and the trader at agave has already been notified that will do not \nintend on paying any penalty charges. this deal was done on a gtc - spot \ncontract, not on their firm agreement.\n","{'Subject': 'Southwest Gas-Penalty Invoice', 'Receiver': ""['Laurie Ellis']"", 'thread_id': 26, 'summary': 'Debra Perlingiere from Enron North America Corp. thanks Laurie Ellis for her efforts and attaches a manual invoice to SWG. Jane M Tholt wants the penalty billed to Southwest Gas and is open to them challenging it. Debra Perlingiere suggests billing the penalty and mentions that the contract states that if the buyer does not schedule, they will pay the replacement price differential plus $0.15. The replacement price differential is estimated to be around $16.80.', 'TimeStamp': '2001-02-01 04:14:00', 'Sender': 'Jane M Tholt'}"
2,"From: Darron C Giron\nTo: ['Lisa Valderrama']\nTimestamp: 2001-02-28 08:25:00\nSubject: Penalty Charges\nSummary: The email thread discusses penalty charges related to gas deals. Mike asks Patti to review the deals and determine if Southwest Gas penalized multiple deals or just one. Lisa provides details of the penalties charged by different counterparties. Laurie mentions that all penalties were netted in payments received from Southwest Gas. Darron forwards an email from Bryce, the Manager of physical gas settlements, with a summary of penalties. Finally, someone states that the charges are not valid and that they will not pay penalties on a swing deal at a constrained point. The trader at Agave has been notified of this decision.\n\nI do\n\nDG\n","{'TimeStamp': '2001-04-23 03:13:00', 'thread_id': 48, 'summary': 'The email thread discusses penalty charges related to gas deals. Mike asks Patti to review the deals and determine if Southwest Gas penalized multiple deals or just one. Lisa provides details of the penalties charged by different counterparties. Laurie mentions that all penalties were netted in payments received from Southwest Gas. Darron forwards an email from Bryce, the Manager of physical gas settlements, with a summary of penalties. Finally, someone states that the charges are not valid and that they will not pay penalties on a swing deal at a constrained point. The trader at Agave has been notified of this decision.', 'Subject': 'Penalty Charges', 'Receiver': ""['Genaro Mendoza']"", 'Sender': 'Tori Kuykendall'}"


In [69]:
answer_1

['**Subject Line:** Penalty Charges',
 '',
 '**Answer:** The dataset includes information related to penalty charges involving Southwest Gas. The email threads discuss penalty charges related to gas deals, specifically mentioning instances where penalties were charged by different counterparties and how all penalties were netted in payments received from Southwest Gas. Additionally, there is a mention of disputing the validity of certain charges and a decision not to pay penalties on a swing deal at a constrained point. The dataset does not provide details of the specific penalties charged by Southwest Gas, but it outlines the context in which penalty charges were discussed involving Southwest Gas.',
 '',
 '---',
 '*Metadata*: ',
 '- **Source:** Internal email communication',
 '- **Date:** Various dates in 2001',
 '- **Location:** Southwest Gas-related penalty charges']

In [76]:
test_query_2 = "What is first mail from Tana Jones and at what date"
top_3_2, answer_2 = query_pipeline(test_query_2)

Not found in cache. Found in main collection.


In [77]:
top_3_2

Unnamed: 0,Documents,Metadatas
3,"From: Jones, Tana TJONES\nTo: ['Panus', 'Stephanie SPANUS']\nTimestamp: 2001-11-21 10:27:34\nSubject: Out of Office AutoReply: Encore Energy Solutions, L.P.\nSummary: Several individuals in the email thread are out of the office and provide alternative contacts for immediate assistance. The first person will return on Monday, November 26, and provides a list of individuals to contact for power transactions. The second person is on vacation until the same date and provides a contact for GCP - EES assistance. The third person will be out from Tuesday, November 20, until November 26 and provides a contact for margin issues. The last person will return on Thursday, November 29.\n\nTana Jones is out of the office and will return Thursday, November 29.\n","{'summary': 'Several individuals in the email thread are out of the office and provide alternative contacts for immediate assistance. The first person will return on Monday, November 26, and provides a list of individuals to contact for power transactions. The second person is on vacation until the same date and provides a contact for GCP - EES assistance. The third person will be out from Tuesday, November 20, until November 26 and provides a contact for margin issues. The last person will return on Thursday, November 29.', 'thread_id': 2741, 'TimeStamp': '2001-11-21 10:27:34', 'Receiver': '['Panus', 'Stephanie SPANUS']', 'Subject': 'Out of Office AutoReply: Encore Energy Solutions, L.P.', 'Sender': 'Jones, Tana TJONES'}"
2,"From: Tana Jones\nTo: ['jpeters@andrews-kurth.com']\nTimestamp: 2000-09-08 07:31:00\nSubject: ETA & PA\nSummary: Bob received an email from Tana Jones, who forwarded an email from Sara Shackleton. Tana attached the ETA and PA documents as requested by Sara. Bob mentions that the Word versions he has may have been slightly modified from the online versions.\n\n----- Forwarded by Tana Jones/HOU/ECT on 09/08/2000 02:31 PM -----\n\n\tTana Jones\n\t09/08/2000 01:22 PM\n\n\t\t To: jpeter@andrews-kurth.com\n\t\t cc: Sara Shackleton/HOU/ECT@ECT\n\t\t Subject: ETA & PA\n\nAt the request of Sara, I am attaching the ETA. I am also sending the PA, \njust in case...\n","{'Subject': 'ETA & PA', 'thread_id': 1501, 'TimeStamp': '2000-09-08 07:31:00', 'Receiver': '['jpeters@andrews-kurth.com']', 'Sender': 'Tana Jones', 'summary': 'Bob received an email from Tana Jones, who forwarded an email from Sara Shackleton. Tana attached the ETA and PA documents as requested by Sara. Bob mentions that the Word versions he has may have been slightly modified from the online versions.'}"
7,"From: Tana Jones\nTo: ['Jane McBride']\nTimestamp: 2001-05-11 02:52:00\nSubject: Click Paper Approvals, 05-08-2001\nSummary: Aparna Rajaram sent an email with an attachment titled ""Click Paper Approvals"" to various recipients, including Tana Jones and Amber Ebow. Tana forwarded the email to Jane McBride, asking if there are any issues with a Japanese counterparty wanting to trade physical and financial pulp and paper. Jane responded that there are no problems, as the Japanese counterparty is listed on the Tokyo Stock Exchange and is the second largest paper company in Japan. Amber Ebow also replied to Aparna's email, stating that any adjustments will be highlighted in blue and offering assistance with any questions.\n\nThanks for the info!\n\n\tJane McBride@ENRON\n\t05/11/2001 06:00 AM\n\n\t\t To: Tana Jones/HOU/ECT@ECT\n\t\t cc: \n\t\t Subject: Re: Click Paper Approvals, 05-08-2001\n\nTana,\n\nNo probs. They are listed on the first section of the Tokyo Stock Exchange \nand are the second largest paper company here. \n\nJane McBride\nSenior Legal Counsel\nEnron Japan Corp.\n\nOtemachi 1st Square Bldg.\nWest 11th Floor\n1-5-1 Otemachi, Chiyoda-ku\nTokyo 100-0004\nJapan\n\nURL: www.enron.co.jp\n\nTel.: 81-3-5219-4500 (Main)\n 81-3-5219-4553 (Direct)\nFax: 81-3-5219-4510\n\nAssistant (Maggy Yamanishi)\nTel.: 81-3-5219-4554 \nEmail: Yo.Yamanishi@enron.com\n\n\tTana Jones@ECT\n\t2001/05/11 07:08\n\n\t\t To: Jane McBride/AP/Enron@Enron\n\t\t cc: \n\t\t Subject: Re: Click Paper Approvals, 05-08-2001\n\nJane,\n\nWe have a Japanese counterparty on this list that wants to trade physical & \nfinancial pulp & paper. Any problems?\n----- Forwarded by Tana Jones/HOU/ECT on 05/10/2001 05:07 PM -----\n\n\tAmber Ebow\n\t05/10/2001 02:48 PM\n\n\t\t To: Aparna Rajaram/ENRON@enronXgate @ ENRON\n\t\t cc: Kelly Lombardi/NA/Enron@Enron, Samuel Schott/HOU/ECT, Bernice \nRodriguez/HOU/ECT@ECT, Bill D Hare/HOU/ECT@ect, Steve Venturatos/HOU/ECT@ECT, \nTanya Rohauer/ENRON@enronXgate@ENRON, Tom Moran/ENRON@enronXgate@ENRON, Tana \nJones/HOU/ECT@ECT, Frank L Davis/ENRON@enronXgate@ENRON, Ken \nCurry/ENRON@enronXgate@ENRON, Nicole Hunter/ENRON@enronXgate@ENRON, Lesli \nCampbell/ENRON@enronXgate@ENRON, Angela Y Brown/ENRON@enronXgate@ENRON, Karen \nLambert/HOU/ECT@ECT, Fraisy George/ENRON@enronXgate@ENRON\n\t\t Subject: Re: Click Paper Approvals, 05-08-2001\n\nAny GCP Adjustments will be highlighted in blue. Please feel free to give me \na call with any questions.\n\nAmber N Ebow x5-8331\nGlobal Counterparty_Enron Net Works\n\nTo: Kelly Lombardi/NA/Enron@Enron, Samuel Schott/HOU/ECT@ECT, Bernice \nRodriguez/HOU/ECT@ECT, Bill D Hare/HOU/ECT@ect, Steve Venturatos/HOU/ECT@ECT, \nSamuel Schott/HOU/ECT@ECT, Tanya Rohauer/ENRON@enronXgate, Tom \nMoran/ENRON@enronXgate, Tana Jones/HOU/ECT@ECT, Frank L \nDavis/ENRON@enronXgate, Ken Curry/ENRON@enronXgate, Nicole \nHunter/ENRON@enronXgate, Lesli Campbell/ENRON@enronXgate, Angela Y \nBrown/ENRON@enronXgate, Amber Ebow/HOU/ECT@ECT, Karen Lambert/HOU/ECT@ECT, \nFraisy George/ENRON@enronXgate\nSubject: Click Paper Approvals, 05-08-2001\n\nPlease see attached.\n\nAparna Rajaram\nPh: (713) 345-4563\nFax: (713) 853-9476\nAparna.Rajaram@enron.com\n","{'summary': 'Aparna Rajaram sent an email with an attachment titled ""Click Paper Approvals"" to various recipients, including Tana Jones and Amber Ebow. Tana forwarded the email to Jane McBride, asking if there are any issues with a Japanese counterparty wanting to trade physical and financial pulp and paper. Jane responded that there are no problems, as the Japanese counterparty is listed on the Tokyo Stock Exchange and is the second largest paper company in Japan. Amber Ebow also replied to Aparna's email, stating that any adjustments will be highlighted in blue and offering assistance with any questions.', 'Receiver': '['Jane McBride']', 'Sender': 'Tana Jones', 'TimeStamp': '2001-05-11 02:52:00', 'thread_id': 3015, 'Subject': 'Click Paper Approvals, 05-08-2001'}"


In [78]:
answer_2

['**Subject Line:** ETA & PA',
 '',
 '**Answer:** The first email from Tana Jones was with the subject "ETA & PA" sent on September 8, 2000, to jpeters@andrews-kurth.com.',
 '',
 'Would you like to know more details or need assistance with anything else?']

In [79]:
test_query_3 = "Can you tell me all on the christmas list"
top_3_3, answer_3 = query_pipeline(test_query_3)

Not found in cache. Found in main collection.


In [80]:
top_3_3

Unnamed: 0,Documents,Metadatas
3,"From: Kevin G Moore\nTo: ['Vince J Kaminski', 'Mike A Roberts']\nTimestamp: 2000-11-07 02:57:00\nSubject: Christmas List\nSummary: The email thread includes various individuals sharing their Christmas wish lists and discussing a revised Christmas card list. Mark suggests a flashlight with a magnet for the car trunk, while Michelle shares her specific gift preferences. Audrey sends a revised Christmas card list for review, and Kevin informs Vince and Mike about plans for gift baskets, suggesting alternative tokens of appreciation for certain individuals. Kevin Moore seeks approval and input on the gift baskets.\n\nHello Vince and Mike\n\nI want to keep you informed.\n\nThis year all baskets will be done in a timely manner.\nOn last year we were going through a major move therefore \nmany people played key roles in keeping us together.\nThis year however , is a little different , as it is always nice\nto give unfortunately we can not give to everyone.\nI am sending a lists of who we have so far.\n\nThere are a few names on the list that I feel we should do something else \nfor this year .\nunder Shirley's list of names.\n\n(not so expensive)\n\nThey are : Move Team who?\n Mail Room who? \n Facilities Help Desk who?\n\nThere are other tokens of appreciation that we can get for them.\n\nPlease note that you two are the only ones that have seen this e-mail so far\nI will need your approval for all baskets, however your input on the matter \nwill be \ngreatly appreciated.The list is not completed I am still waiting for \nadditions.\n\n Thanks\n Kevin Moore\n","{'Receiver': '['Vince J Kaminski', 'Mike A Roberts']', 'Subject': 'Christmas List', 'thread_id': 43, 'summary': 'The email thread includes various individuals sharing their Christmas wish lists and discussing a revised Christmas card list. Mark suggests a flashlight with a magnet for the car trunk, while Michelle shares her specific gift preferences. Audrey sends a revised Christmas card list for review, and Kevin informs Vince and Mike about plans for gift baskets, suggesting alternative tokens of appreciation for certain individuals. Kevin Moore seeks approval and input on the gift baskets.', 'Sender': 'Kevin G Moore', 'TimeStamp': '2000-11-07 02:57:00'}"
0,"From: Michelle Cash\nTo: ['Trey Cash']\nTimestamp: 2000-11-18 08:15:00\nSubject: Christmas List\nSummary: The email thread includes various individuals sharing their Christmas wish lists and discussing a revised Christmas card list. Mark suggests a flashlight with a magnet for the car trunk, while Michelle shares her specific gift preferences. Audrey sends a revised Christmas card list for review, and Kevin informs Vince and Mike about plans for gift baskets, suggesting alternative tokens of appreciation for certain individuals. Kevin Moore seeks approval and input on the gift baskets.\n\nTrey,\n\nThe following items are on my Christmas list:\n\n1. Boggle game\n2. Joe Jackson CD: Night & Day II\n3. Pottery Barn Large Organizer: Catalogue number 48-2806305 ($69)\n4. Monnogrammed Travel Alarm Clock (Pottery Barn) Catalogue # 10-2811784 \n($30)\n5. Monogrammed Business Card Holder (Pottery Barn) Item # 12-33756186 ($30)\n6. Hermes or Hermes-type scarf\n7. Wood picture frames -- painted black is ok -- for 5x7 prints\n8. Drink coasters \n\nLet me know if you have any questions or want more specific information.\n\nMichelle\n","{'Receiver': '['Trey Cash']', 'summary': 'The email thread includes various individuals sharing their Christmas wish lists and discussing a revised Christmas card list. Mark suggests a flashlight with a magnet for the car trunk, while Michelle shares her specific gift preferences. Audrey sends a revised Christmas card list for review, and Kevin informs Vince and Mike about plans for gift baskets, suggesting alternative tokens of appreciation for certain individuals. Kevin Moore seeks approval and input on the gift baskets.', 'thread_id': 43, 'Sender': 'Michelle Cash', 'Subject': 'Christmas List', 'TimeStamp': '2000-11-18 08:15:00'}"
6,"From: Jeffery Fawcett\nTo: ['Kevin Hyatt']\nTimestamp: 2000-11-13 06:54:00\nSubject: Christmas List\nSummary: The email thread includes various individuals sharing their Christmas wish lists and discussing a revised Christmas card list. Mark suggests a flashlight with a magnet for the car trunk, while Michelle shares her specific gift preferences. Audrey sends a revised Christmas card list for review, and Kevin informs Vince and Mike about plans for gift baskets, suggesting alternative tokens of appreciation for certain individuals. Kevin Moore seeks approval and input on the gift baskets.\n\nI've taken the oars at trying to revise this list. Please look over and make \nchanges as appropriate. I've double struck the ones I thought we should \ndelete, and have shaded in blue those cells which need to be corrected or \nfilled-in. Please let Kevin know when you've had a chance to review, correct \nand approve. He can then pass it on to Audrey for final.\n\nI've saved it in the common drive so everyone has a chance to work on it. \nIt's at the following address:\n\nn:\homedept\tw_nng\TW Longterm\Kevin\XMASlistTWcustomers.doc\n\n---------------------- Forwarded by Jeffery Fawcett/ET&S/Enron on 11/13/2000 \n02:43 PM ---------------------------\n\nAudrey Robertson\n11/07/2000 04:06 PM\nFawcett/ET&S/Enron, Lorraine Lindberg/ET&S/Enron@ENRON, TK \nLohman/ET&S/Enron@ENRON, Christine Stokes/ET&S/Enron@ENRON\n\nPlease find attached the revised copy of the Christmast Card List. I will be \nworking on this list tomorrow and ask that you review. \n\nI have combined all of the lists and deleted duplicates. Make sure that the \npersons and companies listed are updated.\n\nThanks in advance,\n\nadr\n","{'Subject': 'Christmas List', 'thread_id': 43, 'Receiver': '['Kevin Hyatt']', 'Sender': 'Jeffery Fawcett', 'TimeStamp': '2000-11-13 06:54:00', 'summary': 'The email thread includes various individuals sharing their Christmas wish lists and discussing a revised Christmas card list. Mark suggests a flashlight with a magnet for the car trunk, while Michelle shares her specific gift preferences. Audrey sends a revised Christmas card list for review, and Kevin informs Vince and Mike about plans for gift baskets, suggesting alternative tokens of appreciation for certain individuals. Kevin Moore seeks approval and input on the gift baskets.'}"


In [81]:
answer_3

['**Subject Line:** Christmas List',
 '',
 "**Answer:** The Christmas list includes various items requested by individuals as part of their wish lists. Michelle Cash's list, for example, includes items such as a Boggle game, a Joe Jackson CD, Pottery Barn organizers and accessories, a Hermes-type scarf, wood picture frames, and drink coasters. The list also involves discussions on a revised Christmas card list and plans for gift baskets.",
 '',
 "[Additional metadata for context: Michelle Cash's email includes specific items listed on her Christmas list, indicating her gift preferences.]",
 '',
 'Is there anything else I can assist you with?']