# STEP 1 — Generate CSV using LLM

In [1]:
import os
import openai

os.environ["OPENAI_API_KEY"] = "YOUR API KEY"
os.environ["OPENAI_BASE_URL"] = "https://openai.vocareum.com/v1"

openai.api_key = "YOUR API KEY"
openai.api_base = "https://openai.vocareum.com/v1"


prompt = """
Generate 10 synthetic real estate listings.
Return ONLY valid CSV format (no backticks, no commentary).
Columns:
Neighborhood,Price,Bedrooms,Bathrooms,House Size,Description

Rules:
- Bedrooms must be an integer between 1 and 5.
- Bathrooms must be an integer between 1 and 4.
- Price must be a realistic positive number.
- House Size must be a positive number in square feet.
- Add a proper in detail description for each estate.
- There should not be any None/Null/Void Cell in the CSV
"""

response = openai.ChatCompletion.create(
    model="gpt-3.5-turbo",
    messages=[{"role": "user", "content": prompt}],
    temperature=0.5
)

csv_text = response.choices[0].message.content.strip()

# Save directly as CSV file
with open("listings.csv", "w", encoding="utf-8") as f:
    f.write(csv_text)

print("CSV generated successfully: listings.csv")

CSV generated successfully: listings.csv


# STEP 2 — Load CSV → Split → Embed → ChromaDB

In [2]:
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma

# Load CSV
loader = CSVLoader(file_path="listings.csv")
docs = loader.load()

# Split long descriptions if needed
splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
split_docs = splitter.split_documents(docs)

# Embeddings + ChromaDB
embeddings = OpenAIEmbeddings()
db = Chroma.from_documents(split_docs, embeddings)

# STEP 3 — Semantic Search (Using ChromaDB)

In [3]:
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma

# SEMANTIC SEARCH QUERY
query = "Find me a 3-bedroom house near a family-friendly neighborhood."

# Retrieve top 5 similar documents
results = db.similarity_search(query, k=5)

for i, r in enumerate(results, 1):
    print(f"\n--- Result {i} ---")
    print(r.page_content)



--- Result 1 ---
Neighborhood: Suburbia
Price: 750000
Bedrooms: 4
Bathrooms: 2
House Size: 2500
Description: Spacious 4 bedroom, 2 bathroom family home in a quiet suburban neighborhood. Large backyard perfect for kids and pets.

--- Result 2 ---
Neighborhood: Historic District
Price: 950000
Bedrooms: 3
Bathrooms: 2
House Size: 2200
Description: Quaint 3 bedroom, 2 bathroom historic home in a charming neighborhood. Close to shops, restaurants, and cultural attractions.

--- Result 3 ---
Neighborhood: Beachfront
Price: 1000000
Bedrooms: 3
Bathrooms: 3
House Size: 1800
Description: Beautiful 3 bedroom, 3 bathroom beachfront villa with stunning ocean views. Ideal for those who love to relax by the water.

--- Result 4 ---
Neighborhood: Lakeview
Price: 800000
Bedrooms: 5
Bathrooms: 4
House Size: 3000
Description: Spacious 5 bedroom, 4 bathroom lakefront home with panoramic views. Ideal for large families or those who love to entertain.

--- Result 5 ---
Neighborhood: City Center
Price: 900

# STEP 4 — Personalisation using RAG (Retrieval-Augmented Generation)

In [4]:
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI

# LLM
llm = OpenAI(
    model_name="gpt-3.5-turbo",
    temperature=0,
    max_tokens=500
)

# Build Retriever
retriever = db.as_retriever(search_kwargs={"k": 5})

# RAG Chain
rag = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever
)

query = "Recommend the best listing for a young working couple who need 2 bedrooms and a safe area."
response = rag.run(query)

print(response)



The best listing for a young working couple who need 2 bedrooms and a safe area would be the cozy 2 bedroom, 1 bathroom condo in the heart of downtown. It is in a safe neighborhood and perfect for young professionals.


# STEP 5 - Add Personalization with ConversationSummaryMemory

In [5]:
from langchain.memory import ConversationSummaryMemory, ChatMessageHistory
from langchain.prompts import PromptTemplate
from langchain.chains import ConversationalRetrievalChain
from langchain.llms import OpenAI

In [8]:
personal_questions = [
    "What is your ideal number of bedrooms?",
    "What kind of neighborhood do you prefer?",
    "What is your maximum budget?",
    "Which amenities matter most to you?",
    "Do you prefer urban, suburban, or semi-urban areas?"
]

# ---------------------------------------------
# 2. Hard-Coded Answers (replace with anything you like)
# ---------------------------------------------
answers = [
    "I want 3 bedrooms.",
    "A quiet and family-friendly neighborhood.",
    "My maximum budget is 80 lakhs.",
    "I need parking, nearby schools, and a park.",
    "I prefer suburban areas."
]

history = ChatMessageHistory()

# Initial system instruction
history.add_user_message(
    f"You are a real estate AI assistant. Ask the user {len(personal_questions)} personalization questions."
)

# Add each Q/A pair into the chat history (simulated conversation)
for q, a in zip(personal_questions, answers):
    history.add_ai_message(q)     # AI asks the question
    history.add_user_message(a)   # Hardcoded user answer

# ---------------------------------------------
# 3. Create Summarization Memory
# ---------------------------------------------
llm_summary = OpenAI(
    model_name="gpt-3.5-turbo",
    temperature=0,
    max_tokens=200
)

memory = ConversationSummaryMemory(
    llm=llm_summary,
    chat_memory=history,
    memory_key="summary",
    input_key="question",
    return_messages=True,
    buffer="Summarize user preferences clearly. Extract budget, bedrooms, amenities, and neighborhood type."
)

# Generate summary
memory.load_memory_variables({"question": "Summarize my home preferences."})
summary_text = memory.buffer

print("\n\n=== USER PREFERENCE SUMMARY ===")
print(summary_text)

# ---------------------------------------------
# 4. Personalized Prompt Template for RAG
# ---------------------------------------------
prompt = PromptTemplate(
    template="""
You are a smart real estate advisor.

User Preferences Summary:
{summary}

Retrieved Property Information:
{context}

User Question:
{question}

Answer in a friendly, helpful tone (max 5 sentences).
""",
    input_variables=["summary", "context", "question"]
)

chain_type_kwargs = {"prompt": prompt}

# ---------------------------------------------
# 5. Build final Personalized RAG system
# ---------------------------------------------
personalized_chain = ConversationalRetrievalChain.from_llm(
    llm=OpenAI(model_name="gpt-3.5-turbo", temperature=0, max_tokens=400),
    chain_type="stuff",
    retriever=db.as_retriever(search_kwargs={"k": 5}),
    combine_docs_chain_kwargs=chain_type_kwargs,
    memory=memory
)

# ---------------------------------------------
# 6. Final Query (House Recommendation)
# ---------------------------------------------
final_query = "Recommend the most suitable house for me based on my preferences."

response = personalized_chain({"question": final_query,
                             "chat_history":[]})

print("\n\n=== FINAL RECOMMENDATION ===")
print(response["answer"])



=== USER PREFERENCE SUMMARY ===
Summarize user preferences clearly. Extract budget, bedrooms, amenities, and neighborhood type.


=== FINAL RECOMMENDATION ===
Based on your preferences for a quiet suburban neighborhood with 4 bedrooms and 2 bathrooms, I would recommend the property in Suburbia. It fits within your budget and offers a spacious family home with a large backyard, perfect for kids and pets. The Beachfront property may also be a good option with 3 bedrooms and 3 bathrooms, ideal for relaxation by the water. However, if you prefer a historic charm and close proximity to shops and restaurants, the property in the Historic District could be a great fit. Ultimately, the choice depends on your lifestyle and priorities. Let me know if you need more information or assistance in making your decision.
