# STEP 0 - Install chromadb using !pip install chromadb
### All other libraries like openai, langchain, etc should be installed to the latest versions

In [None]:
!pip install chromadb

# STEP 1 - Generate Estate Listings using a LLM (here gpt-3.5-turbo has been used)
### Below in the code you just need to paste your OPENAI API Key for once, and it is not needed further in the codes that follow.

In [24]:
import os
from openai import OpenAI

os.environ["OPENAI_API_KEY"] = "YOUR OPENAI API KEY(JUST ONCE PLACE IT HERE)"
client = OpenAI(base_url="https://openai.vocareum.com/v1")

prompt = """
Generate 10 synthetic real estate listings.
Return ONLY valid CSV format (no backticks, no commentary).
Columns:
Neighborhood,Price,Bedrooms,Bathrooms,House Size,Description

Rules:
- Bedrooms must be an integer between 1 and 5.
- Bathrooms must be an integer between 1 and 4.
- Price must be a realistic positive number.
- House Size must be a positive number in square feet.
- Add a proper in detail description for each estate.
- There should not be any None/Null/Void Cell in the CSV
"""

response = client.chat.completions.create(
    model="gpt-3.5-turbo",
    messages=[{"role": "user", "content": prompt}],
    temperature=0.5
)

csv_text = response.choices[0].message.content.strip()

with open("listings.csv", "w", encoding="utf-8") as f:
    f.write(csv_text)

print("CSV generated successfully → listings.csv")


CSV generated successfully → listings.csv


# STEP 2 - Load CSV -> Embed -> ChromaDB

In [25]:
import os
import csv
from openai import OpenAI
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_text_splitters import CharacterTextSplitter

# OpenAI client
client = OpenAI(base_url="https://openai.vocareum.com/v1")

# Load CSV manually (CSVLoader is deprecated)
docs = []
with open("listings.csv", "r", encoding="utf-8") as f:
    reader = csv.DictReader(f)
    for row in reader:
        text = "\n".join([f"{k}: {v}" for k, v in row.items()])
        docs.append(text)

# Split text chunks
splitter = CharacterTextSplitter(chunk_size=800, chunk_overlap=0)
split_docs = splitter.create_documents(docs)

# Embeddings (new API)
embeddings = OpenAIEmbeddings(
    model="text-embedding-3-small",
    openai_api_key=os.getenv("OPENAI_API_KEY"),
    openai_api_base="https://openai.vocareum.com/v1"
)

# Create ChromaDB
db = Chroma.from_documents(split_docs, embedding=embeddings)

print("ChromaDB created successfully.")


ChromaDB created successfully.


# STEP 3 - Semantic Search

In [26]:
query = "Find me a 3-bedroom house in a family-friendly area."

results = db.similarity_search(query, k=5)

for i, r in enumerate(results, 1):
    print(f"\n--- Result {i} ---")
    print(r.page_content)


--- Result 1 ---
Neighborhood: Suburbia
Price: 350000
Bedrooms: 4
Bathrooms: 3
House Size: 2500
Description: Charming family home in a quiet suburb. Large backyard perfect for kids and pets. Close to schools and parks.

--- Result 2 ---
Neighborhood: Suburbia
Price: 350000
Bedrooms: 4
Bathrooms: 3
House Size: 2500
Description: Spacious family home with a large backyard perfect for entertaining guests.

--- Result 3 ---
Neighborhood: Gated Community
Price: 650000
Bedrooms: 3
Bathrooms: 3
House Size: 2800
Description: Elegant home in a secure gated community. High-end finishes throughout, including a gourmet kitchen and spa-like bathrooms.

--- Result 4 ---
Neighborhood: Golf Course
Price: 600000
Bedrooms: 3
Bathrooms: 3
House Size: 2600
Description: Gorgeous home overlooking the golf course. Perfect for golf enthusiasts with a private putting green in the backyard.

--- Result 5 ---
Neighborhood: Downtown
Price: 500000
Bedrooms: 3
Bathrooms: 2
House Size: 2000
Description: Beautiful mo

# STEP 4 - Personalisation using RAG

In [27]:
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate

# LLM
llm = ChatOpenAI(
    model="gpt-3.5-turbo",
    temperature=0,
    openai_api_key=os.getenv("OPENAI_API_KEY"),
    openai_api_base="https://openai.vocareum.com/v1"
)

# Build retriever
retriever = db.as_retriever(search_kwargs={"k": 5})

# Prompt template for RAG
prompt = ChatPromptTemplate.from_template("""
You are a helpful real estate assistant.

Use ONLY the property listings below to answer the user's question.

Listings:
{context}

User Question:
{query}

Answer:
""")

# RAG function
def rag_answer(query):
    # Retrieve docs
    docs = retriever.invoke(query)
    context = "\n\n".join([d.page_content for d in docs])

    # Create full prompt
    final_prompt = prompt.format(context=context, query=query)

    # Call LLM
    response = llm.invoke(final_prompt)
    return response.content

answer = rag_answer("Recommend a house for a family with children under 80 lakhs.")
print(answer)


I recommend the charming family home in Suburbia for 350000. It has 4 bedrooms, 3 bathrooms, and a large backyard perfect for kids and pets. It is close to schools and parks, making it an ideal choice for a family with children.


# STEP 5.1 - Creating a Summary of preferences

In [28]:
from langchain_openai import ChatOpenAI

summary_llm = ChatOpenAI(
    model="gpt-3.5-turbo",
    temperature=0,
    openai_api_key=os.getenv("OPENAI_API_KEY"),
    openai_api_base="https://openai.vocareum.com/v1"
)

def summarize_preferences(questions, answers):
    text = ""
    for q, a in zip(questions, answers):
        text += f"Q: {q}\nA: {a}\n\n"

    prompt = f"""
Summarize the user's home preferences clearly.
Extract budget, bedrooms, neighborhood type, amenities, and any constraints.

Conversation:
{text}

Return summary in a detailed paragraph.
"""

    result = summary_llm.invoke(prompt)
    return result.content


In [29]:
personal_questions = [
    "Ideal bedrooms?",
    "Preferred neighborhood?",
    "Maximum budget?",
    "Which amenities matter?",
    "Urban/suburban preference?"
]

answers = [
    "3 bedrooms",
    "Quiet, family-friendly area",
    "80 lakhs",
    "Parking and nearby schools",
    "Suburban"
]

summary = summarize_preferences(personal_questions, answers)
print(summary)


The user is looking for a home with 3 bedrooms in a quiet, family-friendly suburban neighborhood. Their maximum budget is 80 lakhs, and they prioritize amenities such as parking and nearby schools. They are not interested in an urban setting and prefer a more suburban environment for their ideal home.


# STEP 5.2 - Personalisation using Summary of Past Conversation

In [30]:
personalized_prompt = ChatPromptTemplate.from_template("""
You are a smart real estate advisor.

User Preferences:
{summary}

Listings:
{context}

User Question:
{query}

Answer in a friendly and helpful tone.
""")

def personalized_rag(query, summary):
    docs = retriever.invoke(query)
    context = "\n\n".join(d.page_content for d in docs)

    final_prompt = personalized_prompt.format(
        summary=summary,
        context=context,
        query=query
    )

    response = llm.invoke(final_prompt)
    return response.content


In [31]:
final_answer = personalized_rag(
    "Recommend the best home for me.",
    summary
)

print(final_answer)

Based on your preferences for a 3-bedroom home in a quiet, family-friendly suburban neighborhood with amenities like parking and nearby schools, I would recommend the charming family home in Suburbia. It fits within your budget of 80 lakhs and offers a spacious layout with a large backyard, perfect for kids and pets. It's also close to schools and parks, making it an ideal choice for your family. Feel free to schedule a viewing to see if this home meets all your needs and preferences.
