In [4]:
# Import necessary libraries
from pymongo import MongoClient
from bson.objectid import ObjectId
import json
import re

In [6]:
# Connect to the MongoDB database
client = MongoClient('mongodb://localhost:27017/')
db = client['emails']
collection = db['complete_details']
api_response_collection = db['api_response']

In [8]:
# Specify which document to retrieve
n = 9  # Change this number to retrieve different documents

# Retrieve the document
document = collection.find().skip(n - 1).limit(1)

In [10]:
# Initialize a variable to hold document details
document_detail = ""

In [12]:
# Iterate through the document cursor
for docs in document:
    # Convert the document to a JSON string
    document_detail = json.dumps(docs, default=str)

In [14]:
print(document_detail)

{"_id": "6708a9fc2a972ccfae483e7d", "file": "allen-p/_sent_mail/101.", "message": "Message-ID: <20641191.1075855687472.JavaMail.evans@thyme>\nDate: Tue, 17 Oct 2000 02:26:00 -0700 (PDT)\nFrom: phillip.allen@enron.com\nTo: mark.scott@enron.com\nSubject: Re: High Speed Internet Access\nMime-Version: 1.0\nContent-Type: text/plain; charset=us-ascii\nContent-Transfer-Encoding: 7bit\nX-From: Phillip K Allen\nX-To: Mark Scott\nX-cc: \nX-bcc: \nX-Folder: \\Phillip_Allen_Dec2000\\Notes Folders\\'sent mail\nX-Origin: Allen-P\nX-FileName: pallen.nsf\n\n1. login:  pallen pw: ke9davis\n\n I don't think these are required by the ISP \n\n  2.  static IP address\n\n IP: 64.216.90.105\n Sub: 255.255.255.248\n gate: 64.216.90.110\n DNS: 151.164.1.8\n\n  3.  Company: 0413\n        RC:  105891"}


In [16]:
# Load JSON
data = json.loads(document_detail)

# Print the content in plain text format
print(f"ID: {data['_id']}")
print(f"File: {data['file']}")
print("Message:")
print(data['message'].replace('\\n', '\n'))

ID: 6708a9fc2a972ccfae483e7d
File: allen-p/_sent_mail/101.
Message:
Message-ID: <20641191.1075855687472.JavaMail.evans@thyme>
Date: Tue, 17 Oct 2000 02:26:00 -0700 (PDT)
From: phillip.allen@enron.com
To: mark.scott@enron.com
Subject: Re: High Speed Internet Access
Mime-Version: 1.0
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit
X-From: Phillip K Allen
X-To: Mark Scott
X-cc: 
X-bcc: 
X-Folder: \Phillip_Allen_Dec2000\Notes Folders\'sent mail
X-Origin: Allen-P
X-FileName: pallen.nsf

1. login:  pallen pw: ke9davis

 I don't think these are required by the ISP 

  2.  static IP address

 IP: 64.216.90.105
 Sub: 255.255.255.248
 gate: 64.216.90.110
 DNS: 151.164.1.8

  3.  Company: 0413
        RC:  105891


In [153]:
# Convert the JSON string to a dictionary
data = json.loads(document_detail)

# Extract the 'message' field
message = data.get("message", "")


In [155]:
# Initialize a dictionary to hold the parsed fields
parsed_data = {}

# Define a regex pattern to find key-value pairs in the message
pattern = re.compile(r'^([A-Za-z\-]+):\s*(.+)', re.MULTILINE)


In [157]:
# Find all matches and add them to the parsed_data dictionary
for match in pattern.finditer(message):
    key = match.group(1).strip()  # Extract the key
    value = match.group(2).strip()  # Extract the value

    # Add the key-value pair to the dictionary
    if key in parsed_data:
        # If the key already exists, convert it to a list if not already
        if not isinstance(parsed_data[key], list):
            parsed_data[key] = [parsed_data[key]]
        parsed_data[key].append(value)  # Append the new value
    else:
        parsed_data[key] = value  # Add new key-value pair


In [159]:
# Include other fields from the main document if necessary
parsed_data['_id'] = data.get('_id', "")
parsed_data['file'] = data.get('file', "")

# Find the index of the first blank line to determine where the body starts
body_start_index = message.find('\n\n')  # Look for the first occurrence of double newline

if body_start_index != -1:
    # Extract the body of the email, starting after the blank line
    body = message[body_start_index + 2:].strip()  # Skip the blank line
    parsed_data['Unstructured-Text'] = body  # Add the body as unstructured text
else:
    # If no blank line is found, set unstructured text as empty
    parsed_data['Unstructured-Text'] = ""


In [161]:
# Print the parsed data as a dictionary
print(parsed_data)


{'Message-ID': '<33307764.1075855696139.JavaMail.evans@thyme>', 'Date': 'Fri, 16 Mar 2001 04:28:00 -0800 (PST)', 'From': 'phillip.allen@enron.com', 'To': 'jacquestc@aol.com', 'Subject': 'Mime-Version: 1.0', 'Content-Type': 'text/plain; charset=us-ascii', 'Content-Transfer-Encoding': '7bit', 'X-From': 'Phillip K Allen', 'X-To': 'jacquestc@aol.com', 'X-cc': 'X-bcc:', 'X-Folder': '\\Phillip_Allen_June2001\\Notes Folders\\All documents', 'X-Origin': 'Allen-P', 'X-FileName': 'pallen.nsf', '_id': '6708a9fc2a972ccfae48425b', 'file': 'allen-p/all_documents/460.', 'Unstructured-Text': "Jacques,\n\nI think we reached an agreement with George and Larry to pick up the items of \nvalue and not pay any fees for their time.  It looks as if we will be able to \nuse everything they have done (engineering, architecture, survey, \nappraisal).  One point that is unclear is they claim that the $15,000 in \nextensions that they paid was applied to the purchase price of the land like \nearnest money would be

In [163]:
result_string = (
    f"\nMessage-ID: \t {parsed_data['Message-ID']}"
    f"\nFrom: \t {parsed_data['From']}"
    f"\nSubject: \t {parsed_data['Subject']}"
    f"\nDate: \t {parsed_data['Date']}"
    f"\nBody: \t {parsed_data['Unstructured-Text']}"
)

In [165]:
print(result_string)


Message-ID: 	 <33307764.1075855696139.JavaMail.evans@thyme>
From: 	 phillip.allen@enron.com
Subject: 	 Mime-Version: 1.0
Date: 	 Fri, 16 Mar 2001 04:28:00 -0800 (PST)
Body: 	 Jacques,

I think we reached an agreement with George and Larry to pick up the items of 
value and not pay any fees for their time.  It looks as if we will be able to 
use everything they have done (engineering, architecture, survey, 
appraisal).  One point that is unclear is they claim that the $15,000 in 
extensions that they paid was applied to the purchase price of the land like 
earnest money would be applied.  I looked at the closing statements and I 
didn't see $15,000 applied against the purchase price.  Can you help clear 
this up.

Assuming we clear up the $15,000, we need to get the property released.  
Keith and I are concerned about taking over the Bishop Corner partnership and 
the risk that there could be undisclosed liabilities.  On the other hand, 
conveyance of the partnership would be a time an

In [251]:
import os
from mistralai import Mistral

api_key = "IzwdiP0W04dwKFs1hRB3Mex6yhqHjPVV"
model = "mistral-large-latest"

client = Mistral(api_key=api_key)

def summarize_text(input_text):
    prompt = f"Please summarize the following text into 20 words and provide a new heading, separate both them by new line do not give any sub-headings like Subject or Summary:\n\n{input_text}"
    
    chat_response = client.chat.complete(
        model=model,
        messages=[
            {
                "role": "user",
                "content": prompt,
            },
        ]
    )
    
    return chat_response.choices[0].message.content

In [252]:
# Get the summary of the message
summary_output = summarize_text(result_string)
print(summary_output)

**Enron Land Deal Query**

Phillip seeks Jacques' help to clarify $15,000 discrepancy and decide on Bishop Corner partnership takeover.


In [265]:
# Prepare the new document to be inserted into the api_response collection
new_response = {
    "Message-ID": parsed_data.get("Message-ID", ""),
    "From": parsed_data.get("From", ""),
    "To": parsed_data.get("To", ""),  # Ensure 'To' is included if it's available
    "Subject": summary_output.split('\n')[0],  # Use the first line of the summary as the subject
    "Summarized-Body": summary_output.split('\n')[2]  # The entire summary as the body
}

In [271]:
print(f"\nMessage-ID: \t {new_response['Message-ID']}"
    f"\nFrom: \t {new_response['From']}"
    f"\nTo: \t {new_response['To']}"
    f"\nSubject: \t {new_response['Subject']}"
    f"\nSummary: \t {new_response['Summarized-Body']}")


Message-ID: 	 <33307764.1075855696139.JavaMail.evans@thyme>
From: 	 phillip.allen@enron.com
To: 	 jacquestc@aol.com
Subject: 	 **Enron Land Deal Query**
Summary: 	 Phillip seeks Jacques' help to clarify $15,000 discrepancy and decide on Bishop Corner partnership takeover.


In [273]:
# Insert the new document into the api_response collection
api_response_collection.insert_one(new_response)

InsertOneResult(ObjectId('670cc6cd77e5fb11e9592719'), acknowledged=True)

In [275]:
# Print the new response document
print("New response added to 'api_response':", new_response)

New response added to 'api_response': {'Message-ID': '<33307764.1075855696139.JavaMail.evans@thyme>', 'From': 'phillip.allen@enron.com', 'To': 'jacquestc@aol.com', 'Subject': '**Enron Land Deal Query**', 'Summarized-Body': "Phillip seeks Jacques' help to clarify $15,000 discrepancy and decide on Bishop Corner partnership takeover.", '_id': ObjectId('670cc6cd77e5fb11e9592719')}
