# STEP 1 — Generate CSV using LLM

In [14]:
import os
import openai

os.environ["OPENAI_API_KEY"] = "YOUR OPENAI API KEY"
os.environ["OPENAI_BASE_URL"] = "https://openai.vocareum.com/v1"

openai.api_key = "YOUR OPENAI API KEY"
openai.api_base = "https://openai.vocareum.com/v1"


prompt = """
Generate 10 synthetic real estate listings.
Return ONLY valid CSV format (no backticks, no commentary).
Columns:
Neighborhood,Price,Bedrooms,Bathrooms,House Size,Description

Rules:
- Bedrooms must be an integer between 1 and 5.
- Bathrooms must be an integer between 1 and 4.
- Price must be a realistic positive number.
- House Size must be a positive number in square feet.
- Add a proper in detail description for each estate.
- There should not be any None/Null/Void Cell in the CSV
"""

response = openai.ChatCompletion.create(
    model="gpt-3.5-turbo",
    messages=[{"role": "user", "content": prompt}],
    temperature=0.5
)

csv_text = response.choices[0].message.content.strip()

# Save directly as CSV file
with open("listings.csv", "w", encoding="utf-8") as f:
    f.write(csv_text)

print("CSV generated successfully: listings.csv")

CSV generated successfully: listings.csv


# STEP 2 — Load CSV → Split → Embed → ChromaDB

In [15]:
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma

# Load CSV
loader = CSVLoader(file_path="listings.csv")
docs = loader.load()

# Split long descriptions if needed
splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
split_docs = splitter.split_documents(docs)

# Embeddings + ChromaDB
embeddings = OpenAIEmbeddings()
db = Chroma.from_documents(split_docs, embeddings)
print("ChromaDB Created")

ChromaDB Created


# STEP 3 — Semantic Search (Using ChromaDB)

In [16]:
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma

# SEMANTIC SEARCH QUERY
query = "Find me a 3-bedroom house near a family-friendly neighborhood."

# Retrieve top 5 similar documents
results = db.similarity_search(query, k=5)

for i, r in enumerate(results, 1):
    print(f"\n--- Result {i} ---")
    print(r.page_content)



--- Result 1 ---
Neighborhood: Suburbia
Price: 350000
Bedrooms: 3
Bathrooms: 2
House Size: 2000
Description: Spacious family home located in a quiet neighborhood. This 3 bedroom, 2 bathroom house features a large backyard and updated kitchen.

--- Result 2 ---
Neighborhood: Suburbia
Price: 350000
Bedrooms: 4
Bathrooms: 3
House Size: 2500
Description: Large 4 bedroom, 3 bathroom house located in a quiet suburban neighborhood. Perfect for a growing family.

--- Result 3 ---
Neighborhood: Suburban Oasis
Price: 400000
Bedrooms: 3
Bathrooms: 2
House Size: 2000
Description: Inviting 3 bedroom, 2 bathroom home in a peaceful suburban setting with a spacious backyard for entertaining.

--- Result 4 ---
Neighborhood: Downtown
Price: 500000
Bedrooms: 3
Bathrooms: 2
House Size: 2000
Description: Beautiful 3 bedroom, 2 bathroom home in the heart of downtown. Spacious living area with modern finishes.

--- Result 5 ---
Neighborhood: Gated Community
Price: 900000
Bedrooms: 4
Bathrooms: 3
House Size:

# STEP 4 — Personalisation using RAG (Retrieval-Augmented Generation)

In [17]:
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI

# LLM
llm = OpenAI(
    model_name="gpt-3.5-turbo",
    temperature=0,
    max_tokens=500
)

# Build Retriever
retriever = db.as_retriever(search_kwargs={"k": 5})

# RAG Chain
rag = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever
)

query = "Recommend the best listing for a young working couple who need 2 bedrooms and a safe area."
response = rag.run(query)

print(response)

The best listing for a young working couple who need 2 bedrooms and a safe area would be the condo in the Downtown neighborhood. It offers modern amenities, 2 bedrooms, 2 bathrooms, and is located in the heart of the city.


# STEP 5 - Add Personalization with ConversationSummaryMemory

In [19]:

from langchain.memory import ChatMessageHistory
from langchain.llms import OpenAI

personal_questions = [
    "What is your ideal number of bedrooms?",
    "What kind of neighborhood do you prefer?",
    "What is your maximum budget?",
    "Which amenities matter most to you?",
    "Do you prefer urban, suburban, or semi-urban areas?",
]

answers = [
    "I want 3 bedrooms.",
    "A quiet and family-friendly neighborhood.",
    "My maximum budget is 80 lakhs.",
    "Parking, nearby schools, and a park.",
    "I prefer suburban areas.",
]

# Build the message history
history = ChatMessageHistory()
history.add_user_message(
    f"You are a real estate AI assistant. Ask the user {len(personal_questions)} personalization questions."
)

for q, a in zip(personal_questions, answers):
    history.add_ai_message(q)
    history.add_user_message(a)

# Convert chat history into plain text
transcript = ""
for msg in history.messages:
    role = "User" if msg.type == "human" else "Assistant"
    transcript += f"{role}: {msg.content}\n"

# Summarize using an LLM directly
llm_summary = OpenAI(model_name="gpt-3.5-turbo", temperature=0)

summary_prompt = (
    "Summarize the user's house preferences from the following conversation. "
    "Extract EXACTLY these items: bedrooms, neighborhood type, budget, amenities, and location preference.\n\n"
    f"Conversation:\n{transcript}\n\nSummary:"
)

summary_text = llm_summary(summary_prompt)

print("=== USER PREFERENCE SUMMARY ===")
print(summary_text)


=== USER PREFERENCE SUMMARY ===
- Bedrooms: 3
- Neighborhood type: Quiet and family-friendly
- Budget: 80 lakhs
- Amenities: Parking, nearby schools, park
- Location preference: Suburban areas


In [20]:

prompt = PromptTemplate(
template='''
You are a smart real estate advisor.


User Preferences Summary:
{summary}


Retrieved Property Information:
{context}


User Question:
{question}


Answer in a friendly, helpful tone (max 5 sentences).
''',
input_variables=["summary", "context", "question"]
)


chain_type_kwargs = {"prompt": prompt}


personalized_chain = ConversationalRetrievalChain.from_llm(
llm=OpenAI(model_name="gpt-3.5-turbo", temperature=0, max_tokens=400),
chain_type="stuff",
retriever=db.as_retriever(search_kwargs={"k": 5}),
combine_docs_chain_kwargs=chain_type_kwargs,
memory=memory
)


final_query = "Recommend the most suitable house for me based on my preferences."
response = personalized_chain({"question": final_query, "chat_history": []})


print("=== FINAL PERSONALIZED RECOMMENDATION ===")
print(response.get("answer", response))

=== FINAL PERSONALIZED RECOMMENDATION ===
Based on your preferences for comfort and tranquility, I recommend the property in the Suburban Oasis neighborhood. This inviting 3 bedroom, 2 bathroom home with a spacious backyard is perfect for entertaining and offers a serene suburban oasis. I encourage you to explore this property further and envision yourself living there. Feel free to ask any questions or schedule a viewing to see if this home is the perfect fit for your needs and preferences. Happy house hunting!
