In [2]:
from langchain_core.document_loaders import BaseLoader
from langchain_core.documents import Document

class WhatsAppChatLoader(BaseLoader):
    def __init__(self, file_path: str):
        self.file_path = file_path

    def load(self):
        documents = []
        with open(self.file_path, "r", encoding="utf-8") as file:
            lines = file.readlines()

        for line in lines:
            if " - " in line and ": " in line:
                try:
                    datetime_part, message_part = line.split(" - ", 1)
                    sender, message = message_part.split(": ", 1)

                    doc = Document(
                        page_content=message.strip(),
                        metadata={
                            "datetime": datetime_part.strip(),
                            "sender": sender.strip(),
                            "source": self.file_path
                        }
                    )
                    documents.append(doc)
                except ValueError:
                    continue

        return documents


In [5]:
loader = WhatsAppChatLoader("sample_whatsapp_chat.txt")
docs = loader.load()

print(f"✅ Loaded {len(docs)} messages.\n")


✅ Loaded 1050 messages.



In [6]:
for doc in docs[:5]:
    print(f"{doc.metadata['datetime']} - {doc.metadata['sender']}: {doc.page_content}")


01/01/2024, 09:02 AM - John: I'll be late today.
01/01/2024, 09:10 AM - John: Did you finish the assignment?
01/01/2024, 09:12 AM - You: I'll call you later.
01/01/2024, 09:15 AM - Rahul: What's your plan for the weekend?
01/01/2024, 09:21 AM - You: What's your plan for the weekend?


In [11]:
keyword = "exam"
for doc in docs:
    if keyword.lower() in doc.page_content.lower():
        print(f"{doc.metadata['datetime']} - {doc.metadata['sender']}: {doc.page_content}")



01/01/2024, 09:25 AM - Alice: Don't forget the exam is tomorrow.
01/01/2024, 04:42 PM - Rahul: Don't forget the exam is tomorrow.
01/01/2024, 05:10 PM - Alice: Don't forget the exam is tomorrow.
01/01/2024, 05:23 PM - You: Don't forget the exam is tomorrow.
01/01/2024, 07:01 PM - Alex: Don't forget the exam is tomorrow.
01/01/2024, 07:30 PM - Alice: Don't forget the exam is tomorrow.
01/01/2024, 08:14 PM - Priya: Don't forget the exam is tomorrow.
01/01/2024, 10:17 PM - Alex: Don't forget the exam is tomorrow.
01/01/2024, 11:24 PM - Rahul: Don't forget the exam is tomorrow.
01/01/2024, 11:27 PM - Alice: Don't forget the exam is tomorrow.
01/01/2024, 11:49 PM - Priya: Don't forget the exam is tomorrow.
02/01/2024, 01:33 AM - Rahul: Don't forget the exam is tomorrow.
02/01/2024, 03:43 AM - Alice: Don't forget the exam is tomorrow.
02/01/2024, 03:51 AM - You: Don't forget the exam is tomorrow.
02/01/2024, 04:22 AM - John: Don't forget the exam is tomorrow.
02/01/2024, 05:23 AM - You: Don'

In [8]:
sender = "Alex"
for doc in docs:
    if doc.metadata["sender"] == sender:
        print(f"{doc.metadata['datetime']} - {sender}: {doc.page_content}")
        break  # remove this to print all messages from Alex


01/01/2024, 09:41 AM - Alex: Check the email I sent you.
