In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.memory import ChatMessageHistory
from langchain.schema import HumanMessage

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
import os

API_KEY = "" # API KEY
API_BASE = "https://openai.vocareum.com/v1"

os.environ["OPENAI_API_KEY"] =  API_KEY 
os.environ["OPENAI_API_BASE"] = API_BASE

In [3]:
model_name = "gpt-3.5-turbo"
temperature = 0.5
llm = ChatOpenAI(model_name=model_name, temperature=temperature, max_tokens=1000)

  llm = ChatOpenAI(model_name=model_name, temperature=temperature, max_tokens=1000)


In [None]:
# To get the consistent response in JSON format, define a schema for the response
from langchain.output_parsers import StructuredOutputParser, ResponseSchema

response_schema = [
    ResponseSchema(
        name="Neighborhood", description="Name of the neighborhood", type="string"
    ),
    ResponseSchema(name="Price", description="Property price in USD", type="float"),
    ResponseSchema(name="Bedrooms", description="Number of bedrooms", type="integer"),
    ResponseSchema(name="Bathrooms", description="Number of bathrooms", type="integer"),
    ResponseSchema(
        name="House Size", description="Size of the house in square feet", type="float"
    ),
    ResponseSchema(
        name="Description",
        description="A short description of the property",
        type="string",
    ),
    ResponseSchema(
        name="Neighborhood Description",
        description="Description of the neighborhood",
        type="string",
    ),
]

# Create a parser that enforeces the schema
output_parser = StructuredOutputParser.from_response_schemas(response_schema)

In [5]:
# Get the format instructions (used in the prompt)
format_instructions = output_parser.get_format_instructions()

In [6]:
history = ChatMessageHistory()

In [7]:
example_listing = """
"Neighborhood": "Green Oaks"
"Price": 800000.0
"Bedrooms": 3
"Bathrooms": 2
"House Size": 2000.0

"Description": "Welcome to this eco-friendly oasis nestled in the heart of Green Oaks. This charming 3-bedroom, 2-bathroom home boasts energy-efficient features such as solar panels and a well-insulated structure. Natural light floods the living spaces, highlighting the beautiful hardwood floors and eco-conscious finishes. The open-concept kitchen and dining area lead to a spacious backyard with a vegetable garden, perfect for the eco-conscious family. Embrace sustainable living without compromising on style in this Green Oaks gem."

"Neighborhood Description": "Green Oaks is a close-knit, environmentally-conscious community with access to organic grocery stores, community gardens, and bike paths. Take a stroll through the nearby Green Oaks Park or grab a cup of coffee at the cozy Green Bean Cafe. With easy access to public transportation and bike lanes, commuting is a breeze."
"""

In [None]:
# Define the base prompt
prompt_template = ChatPromptTemplate.from_template(
    "You are a real estate listing generator.\n"
    "Generate ONE unique and realistic property listing in the exact format specified below.\n"
    "Make sure this listing is different from all previously generated ones in the conversation.\n"
    "Also  vary the city name, price, size, bedrooms, bathrooms and so on.\n"
    "{format_instructions}\n"
    "Ensure valid JSON only — no explanations or markdown."
    "Below is the example of desired format for listings: \n"
    f"{example_listing}\n"
    "Ensure to cover bedrooms, bathrooms, space and others into the description field"
    "Ensure when generating, Number of Bathrooms, Bedrooms are integer not float"
    "Also ensure to generate diverse number of Bedrooms and Bathrooms ranging from 2 to 5"
)

In [None]:
import json

n = 0  # Change here to change the number of data in listings
listings = []

# To ensure that the data that is getting generated from LLM are always unique, we added chat history to look back the generated data.
for i in range(n):
    formatted_prompt = prompt_template.format(format_instructions=format_instructions)
    messages = history.messages + [HumanMessage(content=formatted_prompt)]
    response = llm.invoke(messages)
    parsed = output_parser.parse(response.content)
    listings.append(parsed)

    # Add model's response to the chat history (this helps us to generate unique data as this has the data of what it previously generated)
    history.add_user_message(f"Generated listing {i+1}: {json.dumps(parsed)}")
    history.add_ai_message(
        "Got it, I will avoid generating similar listings in the next iteration"
    )

In [11]:
# Store the JSON listings in a file
json_file = "real_estate_listing.json"

with open(json_file, "w", encoding="utf-8") as f:
    json.dump({"real_estate_listings": listings}, f, indent=2, ensure_ascii=False)

In [None]:
# Read the json object from file
import json

with open("real_estate_listing.json", "r") as f:
    data = json.load(f)["real_estate_listings"]

In [None]:
data[0]["Neighborhood"]

'Willow Creek'

In [None]:
from langchain.embeddings import OpenAIEmbeddings

embedding_model = OpenAIEmbeddings(model="text-embedding-3-large")
embedding_dim = 3072

  embedding_model = OpenAIEmbeddings(model="text-embedding-3-large")


In [15]:
embeddings = []
for i in range(len(data)):
    text = data[i]["Description"]
    print(text)
    embedding = embedding_model.embed_query(text)
    embeddings.append(embedding)

Discover the beauty of this spacious 4-bedroom, 3-bathroom home in the desirable Willow Creek neighborhood. The open floor plan features a gourmet kitchen with granite countertops and stainless steel appliances, perfect for entertaining. The master suite offers a luxurious retreat with a spa-like bathroom and walk-in closet. Enjoy the large backyard with a patio, ideal for outdoor gatherings and relaxation. This home is perfect for families looking for comfort and style.
Step into luxury in this stunning 5-bedroom, 4-bathroom home located in the prestigious Maple Ridge neighborhood. The grand foyer welcomes you into a spacious living area with high ceilings and elegant finishes. The gourmet kitchen is a chef's dream with top-of-the-line appliances and a large island for entertaining. Retreat to the master suite featuring a spa-like bathroom and a private balcony with breathtaking views. The backyard oasis includes a pool, spa, and outdoor kitchen, perfect for hosting gatherings or simp

In [None]:
import lancedb
from lancedb.pydantic import vector, LanceModel


class RealEstateData(LanceModel):
    Neighborhood: str
    Price: float
    Bedrooms: int
    Bathrooms: int
    House_Size: float
    Description: str
    Neighborhood_Description: str
    Embedding: vector(embedding_dim)  # type: ignore

In [None]:
import pyarrow as pa

db = lancedb.connect(".lancedb")
table_name = "real_estate_listings"
db.drop_table(table_name, ignore_missing=True)
table = db.create_table(table_name, schema=RealEstateData, mode="overwrite")

[90m[[0m2025-11-10T14:53:59Z [33mWARN [0m lance::dataset::write::insert[90m][0m No existing dataset at /home/sriramramesh/Studies/Generative_AI/Generative_AI_Solutions/.lancedb/real_estate_listings.lance, it will be created


In [None]:
values = []
# Add the data to the database with corresponding embeddings
for i in range(len(data)):
    value = RealEstateData(
        Neighborhood=data[i]["Neighborhood"],
        Price=float(data[i]["Price"]),
        Bedrooms=int(data[i]["Bedrooms"]),
        Bathrooms=int(data[i]["Bathrooms"]),
        House_Size=float(data[i]["House Size"]),
        Description=data[i]["Description"],
        Neighborhood_Description=data[i]["Neighborhood Description"],
        Embedding=embeddings[i],
    )
    values.append(value)

table.add(values)

AddResult(version=2)

In [19]:
len(table)

20

In [20]:
table.head().to_pandas()

Unnamed: 0,Neighborhood,Price,Bedrooms,Bathrooms,House_Size,Description,Neighborhood_Description,Embedding
0,Willow Creek,650000.0,4,3,2400.0,Discover the beauty of this spacious 4-bedroom...,Willow Creek is known for its tree-lined stree...,"[-0.008741284, 0.016291477, -0.011288893, 0.00..."
1,Maple Ridge,720000.0,5,4,3200.0,"Step into luxury in this stunning 5-bedroom, 4...","Maple Ridge is known for its upscale living, t...","[0.0016742505, 0.013524933, -0.01867043, 0.020..."
2,Sunset Hills,550000.0,3,2,1800.0,"Welcome to this charming 3-bedroom, 2-bathroom...",Sunset Hills is a quiet and family-friendly ne...,"[-0.0012877506, 0.01598565, -0.010384931, 0.00..."
3,Pinecrest Heights,680000.0,4,3,2600.0,Step into elegance in this beautiful 4-bedroom...,Pinecrest Heights is known for its lush greene...,"[-0.0029936722, 0.038313735, -0.022896077, -0...."
4,Golden Hills,720000.0,5,4,3000.0,Discover luxury living in this exquisite 5-bed...,Golden Hills is renowned for its upscale livin...,"[-0.015407241, 0.03017887, -0.021115039, -0.00..."


In [None]:
from typing import Union


def get_embedding(query: Union[str, list[str]]):
    query_filtered = " ".join(query)
    return embedding_model.embed_query(query_filtered)

In [None]:
query = [
    "A comfortable three-bedroom house with a spacious kitchen and a cozy living room.",
    "A quiet neighborhood, good local schools, and convenient shopping options.",
    "A backyard for gardening, a two-car garage, and a modern, energy-efficient heating system.",
    "Easy access to a reliable bus line, proximity to a major highway, and bike-friendly roads.",
    "A balance between suburban tranquility and access to urban amenities like restaurants and theaters.",
]

In [25]:
# Query the LanceDB to find the matching listings
k = 5  # Change this to get the top k matching results for the given query
query_vector = get_embedding(query)
results = table.search(query_vector).metric("cosine").limit(k).to_pydantic(RealEstateData)

In [26]:
results

[RealEstateData(Neighborhood='Sunset Hills', Price=550000.0, Bedrooms=3, Bathrooms=2, House_Size=1800.0, Description='Welcome to this charming 3-bedroom, 2-bathroom home located in the serene Sunset Hills neighborhood. The cozy living room features a fireplace, perfect for relaxing evenings. The updated kitchen boasts stainless steel appliances and granite countertops. The master bedroom offers a private retreat with an en-suite bathroom. Step outside to the landscaped backyard with a covered patio, ideal for outdoor dining and entertaining. This home is perfect for those seeking a peaceful retreat in a desirable neighborhood.', Neighborhood_Description='Sunset Hills is a quiet and family-friendly neighborhood with tree-lined streets and parks. Residents can enjoy scenic walking trails, playgrounds, and community events. Conveniently located near shopping centers, restaurants, and top-rated schools, Sunset Hills offers a perfect balance of tranquility and convenience.', Embedding=Fixed

In [54]:
system_prompt = """
You are an expert real estate content enhancer.
Given a property listing and a buyer's preferences:
1. Rewrite the listing description to highlight features that match the buyer's interests.
2. Keep the description factual — do not invent or exaggerate details.
3. Maintain a natural and engaging tone suitable for property listings.
4. Preserve numeric details (bedrooms, bathrooms, and so on) from the original description.
"""

In [55]:
from openai import OpenAI

client = OpenAI(
    api_key=API_KEY,
    base_url=API_BASE
)

def personalized_listing(prompt: str, listing: RealEstateData) -> str:  # type: ignore
    try:
        user_prompt = f"""
        User Preferences:
        {prompt}
        
        Original Listing Description
        {listing.Description}
        
        Original Number of Bedrooms
        {listing.Bedrooms}
        
        Original Number of Bathrooms
        {listing.Bathrooms}
        
        Original Neighborhood Name
        {listing.Neighborhood}
        
        Neighborhood Description
        {listing.Neighborhood_Description}
        
        Task:
        Rewrite the description so it resonates with the buyer preferences, emphasizing aspects that match their preferences.
        Do not invent any details. Keep factual integrity.
        Ensure that the augmentation process enhances the appeal of the listing without altering factual information.
        """

        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt},
            ],
            temperature=0.1,
            max_tokens=512,
            top_p=1,
            frequency_penalty=0,
            presence_penalty=0,
        )

        # The response is a JSON object containing more information than the response. We want to return only the message content
        return response.choices[0].message.content
    except Exception as e:
        return f"An error occured: {e}"

In [56]:
personalized_listings = []
for listing in results:
    personalized_response = personalized_listing(query, listing)
    personalized_listings.append(personalized_response)

In [59]:
print(personalized_listings[3])

Welcome to this charming three-bedroom, two-bathroom home located in the peaceful Willow Grove neighborhood. This residence offers a cozy living room that invites relaxation and a spacious kitchen, perfect for family gatherings and culinary adventures. The open-concept design allows for seamless flow between the living spaces, enhanced by large windows that fill the home with natural light.

Step outside to discover a delightful backyard, ideal for gardening enthusiasts or simply enjoying the outdoors. The property also features a two-car garage, providing ample space for vehicles and storage. 

Willow Grove is renowned for its tranquil atmosphere, excellent local schools, and convenient shopping options, making it an ideal choice for families. With easy access to a reliable bus line and proximity to major highways, commuting is a breeze. Plus, the bike-friendly roads encourage an active lifestyle, allowing you to explore the neighborhood and beyond.

Experience the perfect balance of 