In [4]:
%pip install -qU langgraph langchain langchain_openai langchain_experimental pymupdf tiktoken python-dotenv

Note: you may need to restart the kernel to use updated packages.


In [1]:
import os
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
# from helper.file_loader import FileLoader

load_dotenv()

open_ai_key = os.getenv("OPENAI_API_KEY")
langchain_key = os.getenv("LANGCHAIN_API_KEY")

llm = ChatOpenAI(api_key=open_ai_key, model="gpt-4o")

In [4]:
from langchain.document_loaders import PyMuPDFLoader

document = PyMuPDFLoader("assets/real_estate_book_practice.pdf").load()

In [5]:
import tiktoken
from langchain.text_splitter import RecursiveCharacterTextSplitter

def tiktoken_len(text):
    tokens = tiktoken.encoding_for_model("gpt-4o-mini").encode(
        text,
    )
    return len(tokens)

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 300,
    chunk_overlap = 0,
    length_function = tiktoken_len,
)

split_chunks = text_splitter.split_documents(document)

In [8]:
print(f"total split into {len(split_chunks)} documents")
print(f"First document: {split_chunks[100]}")

total split into 1604 documents
First document: page_content='agent, while the sales agent is the “agent of the (buyer’s) agent.”1 [See RPI 
Form 305 §3]
Historically, and incorrectly, the broker and their agent who represented 
a buyer in a sales transaction were — but no longer are — referred to 
as subagents within the residential multiple listing service (MLS) 
brokerage community until the late 1980s. The misnomer was a product of 
the pre-1980s MLS environment. As the genesis, it was said all brokers (and 
their agents) who were members of a trade union’s MLS were automatically 
“seller’s agents.” Thus, MLS subscribers working directly with a buyer were 
merely subagents employed by the seller to sell a property listed in the MLS 
through the seller’s primary broker. 
1	  Calif. Civil Code §2079.13(n)
An industry-
wide 
membership 
misconception 
 
subagent 
An individual who 
has been delegated 
agency duties by a 
broker employed by 
a client, not the client 
themselves.
Agency

In [10]:
from langchain_openai.embeddings import OpenAIEmbeddings

embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")

In [11]:
%pip install -qU --disable-pip-version-check qdrant-client

Note: you may need to restart the kernel to use updated packages.


In [12]:
from langchain_community.vectorstores import Qdrant

qdrant_vectorstore = Qdrant.from_documents(
  split_chunks,
  embedding_model,
  location=":memory:",
  collection_name="extending_context_window_llama_3",
)

In [13]:
qdrant_retriever = qdrant_vectorstore.as_retriever()

In [20]:
from langchain_core.prompts import ChatPromptTemplate

RAG_PROMPT = """
CONTEXT: 
{context}

QUERY: 
{question}

You are a Real Estate Agent with 20 years of experience that enjoys mentoring your clients. Use the available context to answer the question. If you can't answer the question say you don't know.
"""

rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)

In [21]:
from operator import itemgetter
from langchain.schema.output_parser import StrOutputParser

rag_chain = (
  {"context": itemgetter("question") | qdrant_retriever, "question": itemgetter("question")}
  | rag_prompt | llm | StrOutputParser()
)

In [22]:
#testing the chain
rag_chain.invoke({"question": "who does a real estate agent work for?"})

"As a real estate agent with 20 years of experience, I can tell you that a real estate agent works for their employing broker. According to the provided context, an agent is strictly an agent of the employing broker and cannot contract in their own name or on behalf of anyone other than their employing broker. This means the agent performs services in real estate transactions as a representative leashed to their broker for supervision, all mandated by state codes and regulations. Any fees for services the agent performs are always paid to their employing broker. Thus, an agent's primary allegiance and employment are tied to the broker they work under."