# STEP 1: SETUP THE OPENAI API KEY AND BASE

In [1]:
# All Import Statements used in this step
import os
import openai

In [2]:
os.environ["OPENAI_API_KEY"] = "voc-1682785691266774337744690b15dbca9a82.58870878"
os.environ["OPENAI_API_BASE"] = "https://openai.vocareum.com/v1"

In [3]:
if not os.getenv("OPENAI_API_KEY"):
    raise ValueError("OpenAI API Key not provided.")

In [4]:
openai.api_key = os.getenv("OPENAI_API_KEY")
openai.api_base = os.getenv("OPENAI_API_BASE")

In [5]:
MODEL = "gpt-3.5-turbo"

# STEP 2: GENERATE REAL ESTATE CSV USING LLM

In [6]:
# All Import statements used in this step
import openai

In [7]:
CSV_GENERATOR_SYSTEM_PROMPT = """
You are a CSV Data Generator.

RULES YOU MUST ADHERE TO WHEN CREATING CSV DATA:
1. Use a semi-colon (;) as the Delimiter, NOT a comma.
2. Do not number the rows.
3. No symbols like $, sqft, etc., are allowed within the CSV data.
4. All kinds of descriptions should be detailed and well-organized.

YOUR OUTPUT SHOULD ONLY CONSIST OF THE RAW CSV TEXT. DO NOT INCLUDE MARKDOWN BACKTICKS (```).
"""

In [8]:
QUERY_PROMPT = """
Generate at least 10 fictional Real Estates listings in CSV format.

The columns should be the following:
1. **neighborhood:** The neighborhood in which the property is .
2. **price:** The price of the property in USD.
3. **bedrooms:** The number of bedrooms present in the property. Should be a whole number between 1 to 5.
4. **bathrooms:** The number of bathrooms present in the property. Should be a whole number between 1 to 5.
5. **house_size:** The size of the property in sqft.
6. **description:** Detailed description of the property.
7. **neighborhood_description:** Detailed description of the neighborhood. 
"""

In [9]:
llm_response = openai.ChatCompletion.create(
    model=MODEL,
    temperature=0.7,
    messages=[
        {
            "role": "system",
            "content": CSV_GENERATOR_SYSTEM_PROMPT
        },
        {
            "role": "user",
            "content": QUERY_PROMPT
        }
    ]
)

In [10]:
print(llm_response.choices[0].message.content)

neighborhood;price;bedrooms;bathrooms;house_size;description;neighborhood_description
Oak Ridge;350000;3;2;2000;Beautiful single-family home located in the peaceful neighborhood of Oak Ridge. The house features three bedrooms, two bathrooms, a spacious living room, and a modern kitchen. The property includes a well-maintained backyard and a two-car garage.;Oak Ridge is known for its family-friendly environment, with top-rated schools, parks, and easy access to shopping centers and restaurants.
Willow Creek;420000;4;3;2800;Stunning two-story house in Willow Creek with four bedrooms, three bathrooms, and a bonus room that can be used as an office or playroom. The property boasts a large backyard perfect for outdoor entertaining and a cozy fireplace in the living room.;Willow Creek is a prestigious neighborhood with tree-lined streets, upscale dining options, and close proximity to golf courses and recreational facilities.
Sunset Hills;290000;2;1;1500;Cozy ranch-style home in Sunset Hills

In [11]:
csv_data = llm_response.choices[0].message.content.replace("```csv", "").replace("```", "").strip()

In [12]:
with open("Real_Estates.csv", "w", encoding="utf-8") as file:
    file.write(csv_data)

# STEP 3: READ THE DATA FROM THE CSV

In [13]:
import pandas as pd

In [14]:
df = pd.read_csv(
    "Real_Estates.csv", 
    sep=";",
    engine="python"
)

In [15]:
df.head()

Unnamed: 0,neighborhood,price,bedrooms,bathrooms,house_size,description,neighborhood_description
0,Oak Ridge,350000,3,2,2000,Beautiful single-family home located in the pe...,Oak Ridge is known for its family-friendly env...
1,Willow Creek,420000,4,3,2800,Stunning two-story house in Willow Creek with ...,Willow Creek is a prestigious neighborhood wit...
2,Sunset Hills,290000,2,1,1500,Cozy ranch-style home in Sunset Hills offering...,Sunset Hills is a charming neighborhood known ...
3,Maplewood,550000,5,4,3500,Luxurious estate in Maplewood featuring five b...,Maplewood is an upscale neighborhood with excl...
4,Pinecrest Heights,380000,3,2,2100,Modern townhouse in Pinecrest Heights offering...,Pinecrest Heights is a vibrant neighborhood wi...


# STEP 3: POPULATE VECTOR DATABASE

In [16]:
# All Import Statements used in this step
import shutil
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.document_loaders import DataFrameLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [17]:
CHROMA_PATH = "content/chroma-1"

In [18]:
embedding = OpenAIEmbeddings()

In [19]:
loader = DataFrameLoader(df, page_content_column="description")

In [20]:
documents = loader.load()
print(f"Successfully created {len(documents)} documents from CSV.")

Successfully created 10 documents from CSV.


In [21]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=300,
    chunk_overlap=100,
    length_function=len,
    add_start_index=True
)

In [22]:
chunks = text_splitter.split_documents(documents)
print(f"Successfully created {len(chunks)} chunks from {len(documents)} documents.")

Successfully created 10 chunks from 10 documents.


In [23]:
if chunks:
    document = chunks[5]
    print(document.page_content)

Elegant waterfront property in Riverfront Estates with four bedrooms, three bathrooms, a formal dining room, a home office, and a screened-in porch overlooking the river. The house features high ceilings, hardwood floors, and custom finishes throughout.


In [24]:
if os.path.exists(CHROMA_PATH):
    shutil.rmtree(CHROMA_PATH)

In [25]:
db = Chroma.from_documents(
    documents=chunks,
    embedding=embedding,
    persist_directory=CHROMA_PATH

)

In [26]:
db.persist()
print(f"Successfully populated vector database at {CHROMA_PATH}")

Successfully populated vector database at content/chroma-1


# STEP 4: SEMANTIC SEARCH USING RAG

In [27]:
# All Import statements in this step
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI

In [28]:
llm = ChatOpenAI(
    model=MODEL,
    temperature=0
)

In [29]:
QA_TEMPLATE = """
You are LLMRealtor, a polite and friendly real estate agent.
Use the following context to recommend the best property to the buyer.

Context: {context}

Buyer Preference: {question}

Answer:
"""

In [30]:
QA_CHAIN_PROMPT = PromptTemplate.from_template(QA_TEMPLATE)

In [31]:
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=db.as_retriever(search_kwargs={"k": 3}),
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)

In [32]:
# Example User query
query = "A comfortable three-bedroom house with a spacious kitchen."

In [33]:
response = qa_chain.run(query)

In [34]:
print(response)

Based on your preference for a comfortable three-bedroom house with a spacious kitchen, I would recommend the beautiful single-family home in Oak Ridge. This property not only meets your criteria but also offers a peaceful neighborhood, a well-maintained backyard, and a two-car garage. It's the perfect blend of comfort and convenience for you and your family. Let me know if you would like to schedule a viewing or have any questions about this property.


# STEP 5: PERSONALIZED RECOMMENDATION USING LLM AND MEMORY CHAIN

In [35]:
# All Import Statements used in this step
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationSummaryMemory
from langchain.chains import ConversationChain
from langchain.prompts import PromptTemplate

In [36]:
llm = ChatOpenAI(model_name=MODEL, temperature=0.7)

In [37]:
memory = ConversationSummaryMemory(llm=llm)

In [38]:
realtor_template = """
You are a friendly AI Realtor.
Your goal is to gather information to build a buyer's profile.
Ask questions one by one about budget, location, bedrooms, and lifestyle preferences.
Do not recommend houses yet. Keep your responses short and conversational.

---

Current Conversation Summary:
{history}

User: {input}
Realtor:
"""

In [39]:
realtor_prompt = PromptTemplate(
    template=realtor_template,
    input_variables=["history", "input"]
)
print(realtor_prompt)

input_variables=['history', 'input'] template="\nYou are a friendly AI Realtor.\nYour goal is to gather information to build a buyer's profile.\nAsk questions one by one about budget, location, bedrooms, and lifestyle preferences.\nDo not recommend houses yet. Keep your responses short and conversational.\n\n---\n\nCurrent Conversation Summary:\n{history}\n\nUser: {input}\nRealtor:\n"


In [40]:
conversation = ConversationChain(
    llm=llm,
    memory=memory,
    prompt=realtor_prompt,
    verbose=False
)

print("AI Realtor Initialized. Ready to Chat!")

AI Realtor Initialized. Ready to Chat!


In [56]:
print("--- STARTING INTERVIEW. (Type 'done' to finish) ---")

while True:
    user_input = input("You: ")

    if user_input.lower() in ["done", "exit", "quit", "finish"]:
        print("\n--- INTERVIEW FINISHED ---")
        break

    ai_response = conversation.predict(input=user_input)
    print(f"AI Realtor: {ai_response}")

--- STARTING INTERVIEW. (Type 'done' to finish) ---
You: yes
AI Realtor: What specific features or amenities are important to you in a potential new house?
You: 3 bedrooms 2 bathrooms
AI Realtor: That's great! Do you have a specific location or neighborhood in mind for your new home?
You: no
AI Realtor: What specific features or amenities are important to you in a potential new house?
You: 3000 sqft
AI Realtor: That's a good size! Are there any specific features or amenities you're looking for in a home with that square footage?
You: 3000
AI Realtor: That's a spacious home! Are there any specific features or amenities you're looking for in a house of that size?
You: done

--- INTERVIEW FINISHED ---


In [57]:
summary_data = memory.load_memory_variables({})

In [58]:
user_profile_summary = summary_data["history"]
print(f"FINAL BUYER PROFILE: \n\n{user_profile_summary}")

FINAL BUYER PROFILE: 

The human mentions they are looking for a 3-bedroom, 2-bathroom home with a budget of 1000000. The AI asks for the specific location or neighborhood they are looking to move to but the human declines. The AI then asks about specific features or amenities the human is looking for in a home. When the human declines to provide more information, the AI suggests discussing what features or amenities are important to them in a potential new house. The human responds with a simple "no." The AI then asks what specific features or amenities are important to them in a potential new house and the human responds with a "yes." The human then clarifies they are looking for a 3-bedroom, 2-bathroom home and the AI asks what specific features or amenities are important to them. The human mentions they are looking for a 3000 sqft home. The AI comments on the size and asks if there are any specific features or amenities they are looking for in a house of that size. The human respon

In [59]:
profile_prompt = f"""
Based strictly on the following buyer's profile, write a 1-paragraph description of their absolute dream house.
Include specific details about neighborhood vibe, price point, and amenaties mentioned in the profile.

---

BUYER'S PROFILE:

{user_profile_summary}

---

DREAM HOUSE DESCRIPTION:
"""

In [60]:
dream_house_description = llm.predict(profile_prompt)

In [61]:
print(f"GENERATED SEARCH QUERY:\n\n{dream_house_description}")

GENERATED SEARCH QUERY:

The buyer's dream house would be a spacious 3-bedroom, 2-bathroom home with a budget of $1,000,000 in an undisclosed neighborhood. The ideal home would be a 3000 sqft property with no specific features or amenities mentioned. Given the large size of the house and the generous budget, this dream home would likely feature luxurious finishes, a gourmet kitchen, a large backyard, and possibly a pool or outdoor entertainment area. The neighborhood vibe would likely be upscale and quiet, offering a sense of privacy and exclusivity. Overall, the dream house would be a stunning, spacious retreat with high-end features and ample room for comfortable living and entertaining.


In [62]:
rag_response = qa_chain.run(dream_house_description)

In [63]:
print(rag_response)

Based on your preferences for a spacious 3-bedroom, 2-bathroom home with a budget of $1,000,000, I would recommend the luxurious estate in Maplewood. This property offers five bedrooms and four bathrooms, providing ample space for your needs. The gourmet kitchen, formal dining room, and grand master suite with a spa-like bathroom add a touch of luxury to the home. Additionally, the swimming pool and covered patio for outdoor dining make it perfect for entertaining guests or relaxing in your own private oasis. With its upscale neighborhood and high-end features, this property aligns well with your dream home criteria. I believe this estate in Maplewood would be an excellent choice for you. Let me know if you would like to schedule a viewing or have any further questions.
