In [3]:
print('hello world')

hello world


In [32]:
email = """Hey IT,

My name is Ryan and I am a Recruiter here at Kognitos -  nice to e-meet you! I saw that you applied for the Software Engineer - Intern (Summer 2025) and we'd like to move to the next step in the interview process! We'd like to schedule an intro call with you. Could you please book a time for a quick intro call with us. If you don't see anything that works for your schedule, feel free to provide me with a few dates and times and I will do my best to accommodate.
In the meantime we know that job postings only tell part of a companies story, so we encourage you to dive deeper through the various links below."""

user_query = 'write an email thanking and replying to the email in a professional manner mention that you are available anytime'
category = 'professional'

In [33]:
import os
from dotenv import load_dotenv
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.schema import SystemMessage, HumanMessage
load_dotenv()

True

In [55]:
llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    # other params...
)

In [56]:
from pydantic import BaseModel, Field
class ResponseFormatter(BaseModel):
    """Structured output for rewriting a query for semantic search over email templates."""
    retrieval_query: str = Field(
        description="The optimized query to retrieve semantically similar email responses."
    )
    retrieval_keywords: str = Field(
        description="Essential keywords (space separated) that capture the core intent of the user query."
    )
    # purpose: str = Field(
    #     description="Whether the user is writing a new email or replying to an existing one. Must be one of: 'writing', 'replying'."
    # )
    # intent: str = Field(
    #     description=(
    #         "The functional intent of the email. Must be one of: "
    #         "'requesting', 'inquiring', 'thanking', 'informing', 'apology', 'scheduling'."
    #     )
    # )

In [57]:
structured_llm = llm.with_structured_output(ResponseFormatter)

# query rewriting for retrieval

In [58]:
def rewrite(email_content: str, user_query: str, purpose: str, intent: str, style: str) -> ResponseFormatter:
        messages = [
            SystemMessage(content=(
                "You are a professional email assistant. You will be given:\n"
                "- `email_content`: the original email text\n"
                "- `user_query`: what the user wants to write\n"
                "- `purpose`: whether the user is writing a new email or replying\n"
                "- `intent`: the goal of the email (e.g. requesting, thanking, informing, etc.)\n\n"
                f"Use this context to reformulate the user query into a **retrieval query** that captures the user's intent "
                f"and tone, optimized for semantic similarity search over email response templates.\n"
                f"The tone should match the style: {style}.\n\n"
                "Return:\n"
                "- `retrieval_query`: one concise natural language sentence\n"
                "- `retrieval_keywords`: a space-separated list of keywords"
            )),
            HumanMessage(content=f"Email Content:\n{email_content}"),
            HumanMessage(content=f"User Query:\n{user_query}"),
            HumanMessage(content=f"Purpose: {purpose}"),
            HumanMessage(content=f"Intent: {intent}")
        ]

        response = structured_llm.invoke(messages)
        return response

In [60]:
struct_llm_response = rewrite(email_content=email,user_query=user_query,style='professional',purpose='replying',intent='thanking')

In [61]:
struct_llm_response

ResponseFormatter(retrieval_query='Write a professional email thanking the recruiter and stating availability for a call anytime.', retrieval_keywords='thank you available anytime')

# rag

In [16]:
with open("emails.txt", "r", encoding="utf-8") as f:
    full_text = f.read()

# Split using delimiter
docs = full_text.split('---')
docs = [doc.strip() for doc in docs if doc.strip()]

In [54]:
docs

['Subject: Acknowledgment of Your Email\n\nDear [Recipient’s Name],\n\nThank you for your email. I have received your message and will get back to you shortly. Your inquiry is important to us, and I appreciate your patience as I review the details.\n\nPlease feel free to reach out if you have any additional information to share.\n\nBest regards,\n\n[Your Company Name]',
 'Subject: Acknowledgment of Your Inquiry on [Specific Topic]\n\nHi [Recipient’s Name],\n\nThank you for your email regarding [specific topic]. I am confirming that I have received your message. I will review the details and aim to respond by [specific date/time]. Your input is valuable, and I want to address your concerns thoroughly.\n\nIf you have any further information to share, please feel free to do so.\n\nBest regards,\n\n[Your Company Name]',
 'Subject: Acknowledgment of Your Request: [Request Title]\n\nDear [Recipient’s Name],\n\nI appreciate your request for [specific details] and am happy to assist with it. Y

In [18]:
from langchain.schema import Document

documents = [Document(page_content=doc) for doc in docs]

In [19]:
from langchain.text_splitter import CharacterTextSplitter
text_splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=50)
chunks = text_splitter.split_documents(documents)

In [21]:
print(chunks)

[Document(metadata={}, page_content='Subject: Acknowledgment of Your Email\n\nDear [Recipient’s Name],\n\nThank you for your email. I have received your message and will get back to you shortly. Your inquiry is important to us, and I appreciate your patience as I review the details.'), Document(metadata={}, page_content='Please feel free to reach out if you have any additional information to share.\n\nBest regards,\n\n[Your Company Name]'), Document(metadata={}, page_content='Subject: Acknowledgment of Your Inquiry on [Specific Topic]\n\nHi [Recipient’s Name],'), Document(metadata={}, page_content='Hi [Recipient’s Name],\n\nThank you for your email regarding [specific topic]. I am confirming that I have received your message. I will review the details and aim to respond by [specific date/time]. Your input is valuable, and I want to address your concerns thoroughly.'), Document(metadata={}, page_content='If you have any further information to share, please feel free to do so.\n\nBest re

In [28]:
from langchain.schema import Document
import re

def parse_email(raw_text):
    """Extract subject and body from email string."""
    match = re.match(r"Subject:\s*(.*?)\n\n(.*)", raw_text, re.DOTALL)
    if match:
        subject = match.group(1).strip()
        body = match.group(2).strip()
    else:
        subject = "Unknown Subject"
        body = raw_text.strip()
    return subject, body

# Convert to list of LangChain Documents
email_documents = []
for raw_doc in docs:
    subject, body = parse_email(raw_doc)
    email_documents.append(
        Document(
            page_content=body,
            metadata={"subject": subject}
        )
    )

In [29]:
email_documents[0]

Document(metadata={'subject': 'Acknowledgment of Your Email'}, page_content='Dear [Recipient’s Name],\n\nThank you for your email. I have received your message and will get back to you shortly. Your inquiry is important to us, and I appreciate your patience as I review the details.\n\nPlease feel free to reach out if you have any additional information to share.\n\nBest regards,\n\n[Your Company Name]')

In [25]:
from langchain.embeddings import HuggingFaceEmbeddings

EMBEDDING_MODEL = "all-MiniLM-L6-v2"
embedder = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)

traditional weavite client setup

In [27]:
import weaviate

client = weaviate.connect_to_local()

print(client.is_ready())

client.close()

True


setting up a collection in the database

a collection is a set of objects that share the same data structure

'Before you import data, you should create a collection definition to define the data properties for the collection'

langchain integration with weaviate

In [63]:
import weaviate
# from langchain_community.vectorstores import Weaviate # deprecated 
from langchain_weaviate.vectorstores import WeaviateVectorStore

client = weaviate.connect_to_local()

vectorstore = WeaviateVectorStore.from_documents(
    documents=email_documents,
    client=client,
    index_name='EmailTemplates',
    text_key='content',
    embedding=embedder,
    by_text=False
)

searching

In [64]:
query = struct_llm_response.retrieval_query
similar_docs = vectorstore.similarity_search(query)

# for i, doc in enumerate(similar_docs):
#     print(f"\nDocument {i + 1}:")
#     print(doc.page_content[:100] + "...")

In [65]:
similar_docs[0]

Document(metadata={'text': None, 'subject': 'Acknowledgment of Your Email'}, page_content='Dear [Recipient’s Name],\n\nThank you for your email. I have received your message and will get back to you shortly. Your inquiry is important to us, and I appreciate your patience as I review the details.\n\nPlease feel free to reach out if you have any additional information to share.\n\nBest regards,\n\n[Your Company Name]')

In [53]:
similar_docs[2].page_content

'Hi [Recipient’s Name],\n\nThank you for your email regarding [specific topic]. I am confirming that I have received your message. I will review the details and aim to respond by [specific date/time]. Your input is valuable, and I want to address your concerns thoroughly.\n\nIf you have any further information to share, please feel free to do so.\n\nBest regards,\n\n[Your Company Name]'

# setting up rag

In [76]:
client = weaviate.connect_to_local()

vectorstore = WeaviateVectorStore(
    client=client,
    index_name='EmailTemplates',
    text_key='content',
    embedding=embedder
)

print(client.collections.list_all())

{'EmailTemplates': _CollectionConfigSimple(name='EmailTemplates', description=None, generative_config=None, properties=[_Property(name='text', description=None, data_type=<DataType.TEXT: 'text'>, index_filterable=True, index_range_filters=False, index_searchable=True, nested_properties=None, tokenization=<Tokenization.WORD: 'word'>, vectorizer_config=None, vectorizer='none', vectorizer_configs=None), _Property(name='subject', description="This property was generated by Weaviate's auto-schema feature on Fri Jul 25 03:54:54 2025", data_type=<DataType.TEXT: 'text'>, index_filterable=True, index_range_filters=False, index_searchable=True, nested_properties=None, tokenization=<Tokenization.WORD: 'word'>, vectorizer_config=None, vectorizer='none', vectorizer_configs=None), _Property(name='content', description="This property was generated by Weaviate's auto-schema feature on Fri Jul 25 03:54:54 2025", data_type=<DataType.TEXT: 'text'>, index_filterable=True, index_range_filters=False, index_

  vectorstore = WeaviateVectorStore(


In [77]:
retriever = vectorstore.as_retriever(
        search_type="similarity_score_threshold",
        search_kwargs={"k": 5, "score_threshold": 0.7}
    )

In [78]:
retrieved_templates = retriever.invoke(struct_llm_response.retrieval_query)

In [89]:
retrieved_templates[0].metadata['subject']

'Acknowledgment of Your Email'

In [92]:
retrieved_context = "\n\n".join([f'SUBJECT: {template.metadata['subject']} {template.page_content}\n\n' for template in retrieved_templates])

In [93]:
retrieved_context

'SUBJECT: Acknowledgment of Your Email Dear [Recipient’s Name],\n\nThank you for your email. I have received your message and will get back to you shortly. Your inquiry is important to us, and I appreciate your patience as I review the details.\n\nPlease feel free to reach out if you have any additional information to share.\n\nBest regards,\n\n[Your Company Name]\n\n\n\nSUBJECT: Acknowledgment of Your Email Dear [Recipient’s Name],\n\nThank you for your email. I have received your message and will get back to you shortly. Your inquiry is important to us, and I appreciate your patience as I review the details.\n\nPlease feel free to reach out if you have any additional information to share.\n\nBest regards,\n\n[Your Company Name]\n\n'

In [94]:
from langchain.prompts import ChatPromptTemplate

rag_prompt = ChatPromptTemplate.from_template("""
You are a helpful email assistant. Use the retrieved email templates below to help the user craft a response.

- Purpose: {purpose}
- Intent: {intent}
- Style: {style}

Original Email:
{email_content}

User Request:
{user_query}

Relevant Examples:
{context}

Based on the above, write a clear, well-structured email. Do not copy the templates verbatim. Tailor the tone and content appropriately.
""")


In [98]:
purpose = 'replying'
intent = 'thanking'
style = 'professional'

final_messages = rag_prompt.invoke({
    "context": retrieved_context,
    "email_content": email,
    "user_query": user_query,
    "purpose": purpose,
    "intent": intent,
    "style": style
}).to_messages()

# final_output = llm.invoke(messages)

In [97]:
class EmailReponse(BaseModel):
    subject: str = Field(description="Subject line of the email")
    body: str = Field(description="Body of the email in plain text")

email_writer = llm.with_structured_output(EmailReponse)

In [99]:
final_output = email_writer.invoke(final_messages)

In [100]:
print("Subject:", final_output.subject)
print("Body:", final_output.body)

Subject: Re: Software Engineer - Intern (Summer 2025) - Intro Call
Body: Dear Ryan,

Thank you for your email and the opportunity to move forward in the interview process for the Software Engineer - Intern (Summer 2025) position. I am very interested in this role.

I am available for an introductory call at your earliest convenience. Please let me know what time works best for you, as my schedule is flexible.

I look forward to speaking with you soon.

Best regards,
[Your Name]


for deleting the db

In [110]:
# client.collections.delete('EmailTemplates')

adding the generated output back into the vector db

In [116]:
new_template = Document(
    page_content=final_output.body,
    metadata={
        "subject":final_output.subject
    }
)

vectorstore.add_documents([new_template])

['a76eab87-f62e-433d-9063-73adf2f62b35']

In [None]:
"next integration for enhanced retrival"
"""
retriever = vectorstore.as_retriever(
    search_kwargs={
        "k": 5,
        "filter": {
            "intent": "thanking",
            "purpose": "replying",
            "style": "professional"
        }
    }
)
"""

In [117]:
client.close()

# modularization

In [None]:
from pydantic import BaseModel, Field
class ResponseFormatter(BaseModel):
    """Structured output for rewriting a query for semantic search over email templates."""
    retrieval_query: str = Field(
        description="The optimized query to retrieve semantically similar email responses."
    )
    retrieval_keywords: str = Field(
        description="Essential keywords (space separated) that capture the core intent of the user query."
    )

class EmailReponse(BaseModel):
    subject: str = Field(description="Subject line of the email")
    body: str = Field(description="Body of the email in plain text")

In [124]:
class Rewriter:
    def __init__(self,model):
        self.formatter = model.with_structured_output(ResponseFormatter)

    def rewrite(self,email_content: str, user_query: str, purpose: str, intent: str, style: str = 'professional') -> ResponseFormatter:
        messages = [
            SystemMessage(content=(
                "You are a professional email assistant. You will be given:\n"
                "- `email_content`: the original email text\n"
                "- `user_query`: what the user wants to write\n"
                "- `purpose`: whether the user is writing a new email or replying\n"
                "- `intent`: the goal of the email (e.g. requesting, thanking, informing, etc.)\n\n"
                f"Use this context to reformulate the user query into a **retrieval query** that captures the user's intent "
                f"and tone, optimized for semantic similarity search over email response templates.\n"
                f"The tone should match the style: {style}.\n\n"
                "Return:\n"
                "- `retrieval_query`: one concise natural language sentence\n"
                "- `retrieval_keywords`: a space-separated list of keywords"
            )),
            HumanMessage(content=f"Email Content:\n{email_content}"),
            HumanMessage(content=f"User Query:\n{user_query}"),
            HumanMessage(content=f"Purpose: {purpose}"),
            HumanMessage(content=f"Intent: {intent}")
        ]

        response = structured_llm.invoke(messages)
        return response

In [120]:
class RAG:
    def __init__(self,model):
        self.writer = model.with_structured_output(EmailReponse)
        _client = weaviate.connect_to_local()
        # for fist time creation use:
        """
        vectorstore = WeaviateVectorStore.from_documents(
            documents=email_documents,  # email_documents is a list of templates in Document objects
            client=client,
            index_name='EmailTemplates',
            text_key='content',
            embedding=embedder,
            by_text=False
        )
        """
        self.vectorstore = WeaviateVectorStore(
            client=_client,
            index_name='EmailTemplates',
            text_key='content',
            embedding=embedder
        )
        self.rag_prompt = ChatPromptTemplate.from_template("""
            You are a helpful email assistant. Use the retrieved email templates below to help the user craft a response.

            - Purpose: {purpose}
            - Intent: {intent}
            - Style: {style}

            Original Email:
            {email_content}

            User Request:
            {user_query}

            Relevant Examples:
            {context}

            Based on the above, write a clear, well-structured email. Do not copy the templates verbatim. Tailor the tone and content appropriately.
            """)
        
    def retrieve(self,rewritten_user_query, num_docs, threshold=0.7):
        retriever = self.vectorstore.as_retriever(
            search_type="similarity_score_threshold",
            search_kwargs={"k": num_docs, "score_threshold": threshold}
        )
        retrieved_templates = retriever.invoke(rewritten_user_query.retrieval_query)
        retrieved_context = "\n\n".join([f'SUBJECT: {template.metadata['subject']} {template.page_content}\n\n' for template in retrieved_templates])
        return retrieved_context

    def write(self, retrieved_context,email,user_query,purpose,intent,style='professional'):
        messages = self.rag_prompt.invoke({
            "context": retrieved_context,
            "email_content": email,
            "user_query": user_query,
            "purpose": purpose,
            "intent": intent,
            "style": style
        }).to_messages()
        
        llm_final_output = self.writer.invoke(messages)
        self.updatedb(llm_final_output)
        return llm_final_output

    def updatedb(self,llm_output):
        # add a similarity check to avoid insertion of duplicates
        # add PII removal feature before db update
        new_template = Document(
            page_content=llm_output.body,
            metadata={
                "subject":llm_output.subject
            }
        )

        self.vectorstore.add_documents([new_template])

# testing flow

In [125]:
# Example email content and user query for writing a new email
email_content = ""  # Empty since we're writing a new email, not replying
user_query = "write an email to thank my manager for the opportunity to work on the new project"
purpose = "writing"
intent = "thanking"
style = "professional"

# Initialize Rewriter and RAG
rewriter = Rewriter(llm)
rag = RAG(llm)

# Step 1: Rewrite the user query for retrieval
rewritten_query = rewriter.rewrite(email_content, user_query, purpose, intent, style)
print("Rewritten Query:", rewritten_query)

            Please make sure to close the connection using `client.close()`.
  rag = RAG(llm)


Rewritten Query: retrieval_query='Write a professional email to thank my manager for the opportunity to work on the new project.' retrieval_keywords='thank manager opportunity new project'


In [128]:
type(rewritten_query.retrieval_query)

str

In [130]:
# Step 2: Retrieve relevant templates from the vectorstore
retrieved_context = rag.retrieve(rewritten_query, num_docs=3)
print("Retrieved Context:", retrieved_context)

# Step 3: Generate the final email using RAG
final_output = rag.write(retrieved_context, email_content, user_query, purpose, intent, style)
print("Subject:", final_output.subject)
print("Body:", final_output.body)

Retrieved Context: SUBJECT: Thank You for Everything! Hi [Recipient’s Name],

As we conclude our work on [specific project], I wanted to express my gratitude for your support and partnership. It’s been a pleasure working with you.

Wishing you all the best in your future endeavors!

Sincerely,

[Your Name]


Subject: Thank You for the Opportunity
Body: Dear [Manager's Name],

I am writing to express my sincere gratitude for the opportunity to work on the new project. I am excited about this opportunity and I am confident that I can make a significant contribution.

Thank you again for your trust and support. I am committed to making this project a success.

Sincerely,
[Your Name]


In [131]:
client.close()