In [5]:
import chromadb
chroma_client = chromadb.PersistentClient(path="vectordb")

In [70]:
collection = chroma_client.create_collection(name="email_data")

In [71]:
import re

def clean_text(text):
    text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)
    text = text.lower()
    text = re.sub(r'[^\w\s]', '', text)
    text = re.sub(r'\xa0', '', text)
    text = re.sub(r'\u200c', '', text)

    return text

In [72]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

def split_text(text):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=700,
        chunk_overlap=20
    )
    return text_splitter.split_text(text)

In [73]:
from server import service

def fetch_emails(service, user_id='me', max_results=100):
    try:
        # Fetch the list of messages
        results = service.users().messages().list(userId=user_id, maxResults=max_results).execute()
        messages = results.get('messages', [])
        
        email_data = []
        for message in messages:
            msg = service.users().messages().get(userId=user_id, id=message['id']).execute()
            email_data.append(msg)
        
        return email_data
    except Exception as e:
        print(f'An error occurred: {e}')
        return []

In [74]:
import base64

def add_emails_to_collection(collection, emails):
    for email in emails:
        # For example, extract subject, sender, etc.
        subject = next(header['value'] for header in email['payload']['headers'] if header['name'] == 'Subject')
        sender = next(header['value'] for header in email['payload']['headers'] if header['name'] == 'From')
        snippet = email.get('snippet', '')
        mssg_id = email['id']

        # Clean the extracted text
        clean_subject = clean_text(subject)
        clean_sender = clean_text(sender)
        clean_snippet = clean_text(snippet)

        # Extract image data if available
        image_data = []
        if 'parts' in email['payload']:
            for part in email['payload']['parts']:
                if part['filename'] and 'image' in part['mimeType']:
                    # Decode the image data
                    img_data = base64.urlsafe_b64decode(part['body']['data'])
                    image_data.append(img_data)

        # Split the snippet into chunks
        snippet_chunks = split_text(clean_snippet)

        # Add each chunk to the collection
        for i, chunk in enumerate(snippet_chunks):
            chunk_id = f"{mssg_id}_{i}"
            collection.add(
                ids=[chunk_id],
                metadatas=[{
                    'subject': clean_subject,
                    'sender': clean_sender,
                    'chunk_index': i
                }],
                documents=[chunk]
            )

In [75]:
emails = fetch_emails(service)
print(len(emails))
add_emails_to_collection(collection, emails)

100


In [76]:
results = collection.query(
    query_texts=["dropbox features"],
    n_results=4
)
print(results["documents"])

[['what are the new dropbox features', 'hello dropbox has recently introduced some exciting new features to help you better manage your digital content here are a few key updates 1 automated folders create folders that automatically', 'new features quality enhancements and much more', 'get creative cloud all apps one plan endless possibilities bring any idea to life with the creative cloud all apps plan get photoshop illustrator adobe express and the latest generative ai', 'create quickly and easily with templates from adobe express kick off the holiday spirit with a spectacular party invite making holidaythemed party invites is easy with adobe express browse from']]


# **Collection Exists!**

In [6]:
old_collection = chroma_client.get_collection(name="email_data")

In [7]:
results = old_collection.query(
    query_texts=["dropbox features"],
    n_results=5
)
print(results["documents"])

[['what are the new dropbox features', 'hello dropbox has recently introduced some exciting new features to help you better manage your digital content here are a few key updates 1 automated folders create folders that automatically', 'new features quality enhancements and much more', 'get creative cloud all apps one plan endless possibilities bring any idea to life with the creative cloud all apps plan get photoshop illustrator adobe express and the latest generative ai', 'create quickly and easily with templates from adobe express kick off the holiday spirit with a spectacular party invite making holidaythemed party invites is easy with adobe express browse from']]


# **Generate Reply**

In [1]:
from dotenv import dotenv_values

config = dotenv_values(".env")
sec_key = config["HF_TOKEN"]

In [3]:
from langchain_huggingface import HuggingFaceEndpoint

repo_id = "mistralai/Mistral-7B-Instruct-v0.3"
llm = HuggingFaceEndpoint(repo_id=repo_id, max_length=128, temperature=0.7)

                    max_length was transferred to model_kwargs.
                    Please make sure that max_length is what you intended.
Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


In [2]:
new_email = {
    "Subject": "test mail",
    "From": "Proxylol Account <proxylola193@gmail.com>",
    "To": "\"shogunmasters54@gmail.com\" <shogunmasters54@gmail.com>",
    "Body": "Hello how are you? I was thinking to complete the project this sunday. Confirm if you are available."
}

In [17]:
from langchain import PromptTemplate, LLMChain

def draftEmail(email):
    # Perform a query search with the email body
    query_results = old_collection.query(
        query_texts=[email["Body"]],
        n_results=4
    )
    context_chromadb = query_results["documents"]
    print(context_chromadb)
    
    # reply for the mail
    reply_subject = f"Re: {email['Subject']}"

    # Prompt for the email body
    body_prompt = f"Email Body:\n{email['Body']}, Email Subject:\n{email['Subject']}\n\nRelevant Context:\n{context_chromadb}\n\nDraft a reply to this email. Include only the body of the email:"
    reply_draft = llm.invoke(body_prompt)

    # Combine subject and body drafts into the final email format
    final_email = {
        "Subject": reply_subject.strip(),
        "Body": reply_draft.strip(),
        "From": email["To"],
        "To": email["From"]
    }

    return final_email


In [18]:
print(draftEmail(new_email))

[['don39t miss out on your creative boost the creativity conference 1516 oct free online event adobe max two weeks until max  don39t miss out mark your calendars only two weeks to go until the', 'steam 1 game you39ve wished for is on sale the witness 75  849  212 week long deal offer ends 23 sep 1000pm ist you wake up alone on a strange island full of puzzles that will challenge and', 'create stunning designs in minutes for free dial up the diwali spirit with stunning designs this diwali illuminate your creativity create dazzling greetings social contents flyers and more with', 'stock up on everything you need to bring seasonal projects to life get in the spirit early it39s never too early to get a head start on your holiday projects with cozy festive content from adobe']]
{'Subject': 'Re: test mail', 'Body': "Hi there,\n\nYes, I am available this Sunday to complete the project. Let's make it happen!\n\nBest,\n[Your Name]", 'From': '"shogunmasters54@gmail.com" <shogunmasters54@gmail.co