# Step 1 - Generate Estate Listings using LLM

In [3]:
import os
import openai

os.environ["OPENAI_API_KEY"] = "YOUR API KEY"
os.environ["OPENAI_BASE_URL"] = "https://openai.vocareum.com/v1"

openai.api_key = "YOUR API KEY"
openai.api_base = "https://openai.vocareum.com/v1"


prompt = """
Generate 10 synthetic real estate listings.
Return ONLY valid CSV format (no backticks, no commentary).
Columns:
Neighborhood,Price,Bedrooms,Bathrooms,House Size,Description

Rules:
- Bedrooms must be an integer between 1 and 5.
- Bathrooms must be an integer between 1 and 4.
- Price must be a realistic positive number.
- House Size must be a positive number in square feet.
- Add a proper in detail description for each estate.
- There should not be any None/Null/Void Cell in the CSV
"""

response = openai.ChatCompletion.create(
    model="gpt-3.5-turbo",
    messages=[{"role": "user", "content": prompt}],
    temperature=0.5
)

csv_text = response.choices[0].message.content.strip()

# Save directly as CSV file
with open("listings.csv", "w", encoding="utf-8") as f:
    f.write(csv_text)

print("CSV generated successfully: listings.csv")

CSV generated successfully: listings.csv


# Step 2 - Load CSV → Split → Embed → ChromaDB

In [4]:
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma

# Load CSV
loader = CSVLoader(file_path="listings.csv")
docs = loader.load()

# Split long descriptions if needed
splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
split_docs = splitter.split_documents(docs)

# Embeddings + ChromaDB
embeddings = OpenAIEmbeddings()
db = Chroma.from_documents(split_docs, embeddings)

# STEP 3 - Semantic Search (Using ChromaDB)

In [6]:
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma

# SEMANTIC SEARCH QUERY
query = "Find me a 3-bedroom house near a family-friendly neighborhood."

# Retrieve top 5 similar documents
results = db.similarity_search(query, k=5)

for i, r in enumerate(results, 1):
    print(f"\n--- Result {i} ---")
    print(r.page_content)



--- Result 1 ---
Neighborhood: Family-Friendly
Price: 500000
Bedrooms: 3
Bathrooms: 2
House Size: 2000
Description: Ideal family home in a friendly neighborhood. This 3 bedroom, 2 bathroom house offers a spacious floor plan, backyard play area, and top-rated schools nearby.

--- Result 2 ---
Neighborhood: Suburbia
Price: 350000
Bedrooms: 3
Bathrooms: 2
House Size: 1800
Description: Charming family home located in a quiet suburban neighborhood. This 3 bedroom, 2 bathroom house boasts a large backyard perfect for entertaining.

--- Result 3 ---
Neighborhood: Suburbia
Price: 350000
Bedrooms: 4
Bathrooms: 3
House Size: 2500
Description: Charming family home in a quiet suburb. This 4 bedroom, 3 bathroom house features a large backyard perfect for entertaining.

--- Result 4 ---
Neighborhood: Suburban Oasis
Price: 300000
Bedrooms: 3
Bathrooms: 2
House Size: 2000
Description: Cozy suburban oasis with a backyard garden. This 3 bedroom, 2 bathroom house offers a peaceful retreat from city life

# STEP 4 - RAG (Retrieval-Augmented Generation)

In [7]:
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI

# LLM
llm = OpenAI(
    model_name="gpt-3.5-turbo",
    temperature=0,
    max_tokens=500
)

# Build Retriever
retriever = db.as_retriever(search_kwargs={"k": 5})

# RAG Chain
rag = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever
)

query = "Recommend the best listing for a young working couple who need 2 bedrooms and a safe area."
response = rag.run(query)

print(response)

The best listing for a young working couple who need 2 bedrooms and a safe area would be the condo in the heart of downtown with stunning city views. It is in a safe neighborhood and offers modern finishes and a spacious layout.
