In [None]:
# Import all the required modules
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.memory import ChatMessageHistory
from langchain.schema import HumanMessage

In [None]:
import os

# Set the API key and the base url
API_KEY = "" # API KEY
API_BASE = "https://openai.vocareum.com/v1"

os.environ["OPENAI_API_KEY"] =  API_KEY 
os.environ["OPENAI_API_BASE"] = API_BASE

In [None]:
model_name = "gpt-3.5-turbo" # Set the Model name
temperature = 0.5 # Higher the temperature, more creative the response
llm = ChatOpenAI(model_name=model_name, temperature=temperature, max_tokens=1000)

In [4]:
# To get the consistent response in JSON format, define a schema for the response
from langchain.output_parsers import StructuredOutputParser, ResponseSchema

response_schema = [
    ResponseSchema(
        name="Neighborhood", description="Name of the neighborhood", type="string"
    ),
    ResponseSchema(name="Price", description="Property price in USD", type="float"),
    ResponseSchema(name="Bedrooms", description="Number of bedrooms", type="integer"),
    ResponseSchema(name="Bathrooms", description="Number of bathrooms", type="integer"),
    ResponseSchema(
        name="House Size", description="Size of the house in square feet", type="float"
    ),
    ResponseSchema(
        name="Description",
        description="A short description of the property",
        type="string",
    ),
    ResponseSchema(
        name="Neighborhood Description",
        description="Description of the neighborhood",
        type="string",
    ),
]

# Create a parser that enforeces the schema
output_parser = StructuredOutputParser.from_response_schemas(response_schema)

In [5]:
# Get the format instructions (used in the prompt)
format_instructions = output_parser.get_format_instructions()

In [None]:
# A history to store the chat messages
history = ChatMessageHistory()

In [7]:
example_listing = """
"Neighborhood": "Green Oaks"
"Price": 800000.0
"Bedrooms": 3
"Bathrooms": 2
"House Size": 2000.0

"Description": "Welcome to this eco-friendly oasis nestled in the heart of Green Oaks. This charming 3-bedroom, 2-bathroom home boasts energy-efficient features such as solar panels and a well-insulated structure. Natural light floods the living spaces, highlighting the beautiful hardwood floors and eco-conscious finishes. The open-concept kitchen and dining area lead to a spacious backyard with a vegetable garden, perfect for the eco-conscious family. Embrace sustainable living without compromising on style in this Green Oaks gem."

"Neighborhood Description": "Green Oaks is a close-knit, environmentally-conscious community with access to organic grocery stores, community gardens, and bike paths. Take a stroll through the nearby Green Oaks Park or grab a cup of coffee at the cozy Green Bean Cafe. With easy access to public transportation and bike lanes, commuting is a breeze."
"""

In [8]:
# Define the base prompt
prompt_template = ChatPromptTemplate.from_template(
    "You are a real estate listing generator.\n"
    "Generate ONE unique and realistic property listing in the exact format specified below.\n"
    "Make sure this listing is different from all previously generated ones in the conversation.\n"
    "Also  vary the city name, price, size, bedrooms, bathrooms and so on.\n"
    "{format_instructions}\n"
    "Ensure valid JSON only — no explanations or markdown."
    "Below is the example of desired format for listings: \n"
    f"{example_listing}\n"
    "Ensure to cover bedrooms, bathrooms, space and others into the description field"
    "Ensure when generating, Number of Bathrooms, Bedrooms are integer not float"
    "Ensure to generate diverse number of Bedrooms ranging from 2 to 5 nos"
    "Also Ensure to generate diverse number of Bathrooms ranging from 2 to 5 nos "
)

In [None]:
import json

n = 10  # Change here to change the number of data generated in the listings
listings = []

# To ensure that the data that is getting generated from LLM are always unique, we added chat history to look back the generated data.
for i in range(n):
    formatted_prompt = prompt_template.format(format_instructions=format_instructions)
    messages = history.messages + [HumanMessage(content=formatted_prompt)]
    response = llm.invoke(messages)
    parsed = output_parser.parse(response.content)
    listings.append(parsed)

    # Add model's response to the chat history (this helps us to generate unique data as this has the data of what it previously generated)
    history.add_user_message(f"Generated listing {i+1}: {json.dumps(parsed)}")
    history.add_ai_message(
        "Got it, I will avoid generating similar listings in the next iteration"
    )

In [None]:
# Store the JSON listings in a file
json_file = "real_estate_listing.json"

with open(json_file, "w", encoding="utf-8") as f:
    json.dump({"real_estate_listings": listings}, f, indent=2, ensure_ascii=False)

In [11]:
# Read the json object from file
import json

with open("real_estate_listing.json", "r") as f:
    data = json.load(f)["real_estate_listings"]

In [None]:
from langchain.embeddings import OpenAIEmbeddings

embedding_model = OpenAIEmbeddings(model="text-embedding-3-large")
embedding_dim = 3072

In [None]:
# Create and store the embeddings for the text description which captures the semantic meaning of the text 
embeddings = []
for i in range(len(data)):
    text = "Description: "  + data[i]["Description"] + "\nNeighborhood: " + data[i]["Neighborhood Description"]
    print(text)
    embedding = embedding_model.embed_query(text)
    embeddings.append(embedding)

Description: Discover this spacious 4-bedroom, 3-bathroom home in the desirable Willow Creek neighborhood. The open floor plan features a gourmet kitchen with granite countertops and stainless steel appliances, perfect for entertaining guests. The master suite offers a relaxing retreat with a luxurious en-suite bathroom and walk-in closet. Enjoy the outdoor patio and large backyard, ideal for summer BBQs and gatherings. This home is perfect for families looking for comfort and style in Willow Creek.
Neighborhood: Willow Creek is known for its family-friendly atmosphere and top-rated schools. Residents enjoy easy access to parks, shopping centers, and dining options. Take a leisurely stroll along the tree-lined streets or visit the nearby Willow Creek Country Club for golf and social events.
Description: Step into luxury in this stunning 5-bedroom, 4-bathroom home located in the prestigious Sunset Hills neighborhood. The grand entrance leads to a spacious living room with high ceilings 

In [None]:
import lancedb
from lancedb.pydantic import vector, LanceModel

# Define a data structure to store the generated data
class RealEstateData(LanceModel):
    Neighborhood: str
    Price: float
    Bedrooms: int
    Bathrooms: int
    House_Size: float
    Description: str
    Neighborhood_Description: str
    Embedding: vector(embedding_dim)  # type: ignore

In [None]:
import pyarrow as pa

# Create a database and a table inside it
db = lancedb.connect(".lancedb")
table_name = "real_estate_listings"
db.drop_table(table_name, ignore_missing=True)
table = db.create_table(table_name, schema=RealEstateData, mode="overwrite")

In [None]:
values = []

# Add the data to the database with corresponding embeddings
for i in range(len(data)):
    value = RealEstateData(
        Neighborhood=data[i]["Neighborhood"],
        Price=float(data[i]["Price"]),
        Bedrooms=int(data[i]["Bedrooms"]),
        Bathrooms=int(data[i]["Bathrooms"]),
        House_Size=float(data[i]["House Size"]),
        Description=data[i]["Description"],
        Neighborhood_Description=data[i]["Neighborhood Description"],
        Embedding=embeddings[i],
    )
    values.append(value)

table.add(values)

AddResult(version=2)

In [18]:
len(table)

20

In [19]:
table.head().to_pandas()

Unnamed: 0,Neighborhood,Price,Bedrooms,Bathrooms,House_Size,Description,Neighborhood_Description,Embedding
0,Willow Creek,650000.0,4,3,2400.0,"Discover this spacious 4-bedroom, 3-bathroom h...",Willow Creek is known for its family-friendly ...,"[-0.014942469, 0.021715518, -0.008795829, 0.00..."
1,Sunset Hills,720000.0,5,4,2800.0,"Step into luxury in this stunning 5-bedroom, 4...",Sunset Hills is renowned for its upscale livin...,"[-0.020557083, 0.0053015635, -0.015885597, -0...."
2,Maple Grove,580000.0,3,2,1800.0,"Welcome to this cozy 3-bedroom, 2-bathroom hom...",Maple Grove is known for its tranquil setting ...,"[-0.0031715953, 0.017896859, -0.010458713, 0.0..."
3,Pinecrest Hills,690000.0,4,3,2500.0,"Discover serenity in this elegant 4-bedroom, 3...",Pinecrest Hills is known for its upscale livin...,"[-0.026866933, 0.043429106, -0.017440872, -0.0..."
4,Riverbend Estates,750000.0,5,4,3000.0,Step into luxury in this magnificent 5-bedroom...,Riverbend Estates is a highly sought-after com...,"[-0.013402212, 0.022879306, -0.019083304, -0.0..."


In [None]:
from typing import Union

# Get the embedding for the query vector
def get_embedding(query: Union[str, list[str]]):
    query_filtered = " ".join(query)
    return embedding_model.embed_query(query_filtered)

In [None]:
# Input here the user prompt
query = "A cozy three-bedroom house with spacious kitchen and friendly neighborhood"

In [22]:
# Query the LanceDB to find the matching listings
k = 5  # Change this to get the top k matching results for the given query
query_vector = get_embedding(query)
results = table.search(query_vector).metric("cosine").limit(k).to_pydantic(RealEstateData)

In [None]:
# for result in results:
#     val = f"Nieghborhood: {result['Neighborhood']}, Bedrooms: {result['Bedrooms']}, Bathroom: {result['Bathrooms']}, Distance: {result['_distance']}"
#     print(val)

In [24]:
results

[RealEstateData(Neighborhood='Maple Grove', Price=580000.0, Bedrooms=3, Bathrooms=2, House_Size=1800.0, Description='Welcome to this cozy 3-bedroom, 2-bathroom home in the charming Maple Grove neighborhood. The inviting living room features a fireplace, perfect for relaxing evenings. The kitchen is equipped with modern appliances and a breakfast nook, ideal for casual dining. The master bedroom offers a peaceful retreat with an en-suite bathroom. Step outside to the well-maintained backyard with a deck, great for outdoor gatherings. Experience comfort and warmth in this Maple Grove gem.', Neighborhood_Description='Maple Grove is known for its tranquil setting and friendly community atmosphere. Residents can enjoy nearby parks, cafes, and local shops. The neighborhood provides easy access to walking trails and recreational activities, making it a wonderful place for families and nature enthusiasts.', Embedding=FixedSizeList(dim=3072)),
 RealEstateData(Neighborhood='Meadowbrook Hills', P

In [25]:
system_prompt = """
You are an expert factual real estate content enhancer.
Given a property listing and a buyer's preferences:
1. Rewrite the original listing description to highlight features that match the buyer's interests.
2. Keep the description factual — do not invent or exaggerate details stick to the details of original listing description.
3. Maintain a natural and engaging tone suitable for property listings.
4. Important thing is preserve numeric details (bedrooms, bathrooms, and so on) from the Original description and only enhance the original description.
"""

In [None]:
from openai import OpenAI

client = OpenAI(
    api_key=API_KEY,
    base_url=API_BASE
)

def personalized_listing(prompt: str, listing: RealEstateData) -> str:  # type: ignore
    try:
        user_prompt = f"""
        User Preferences:
        {prompt}
        
        Original Description
        {listing.Description}
        
        Original Number of Bedrooms
        {listing.Bedrooms}
        
        Original Number of Bathrooms
        {listing.Bathrooms}
        
        Original Neighborhood Name
        {listing.Neighborhood}
        
        Neighborhood Description
        {listing.Neighborhood_Description}
        
        Task:
        Rewrite the description so it resonates with the buyer preferences, emphasizing only on aspects that match their preferences.
        Do not invent any details. Keep factual integrity.
        Ensure that the augmentation process enhances the appeal of the listing without altering factual information like the number of bedrooms and bathrooms mentioned in the Original Description.
        Use the number of bedrooms and bathrooms from the original description
        """

        # print(user_prompt)
        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt},
            ],
            temperature=1,
            max_tokens=512,
            top_p=1,
            frequency_penalty=0,
            presence_penalty=0,
        )

        # print("Response: ", response.choices[0].message.content)
        # print("\n\n\n\n")
        # The response is a JSON object containing more information than the response. We want to return only the message content
        return response.choices[0].message.content
    except Exception as e:
        return f"An error occured: {e}"

In [56]:
personalized_listings = []
for listing in results:
    personalized_response = personalized_listing(query, listing)
    personalized_listings.append(personalized_response)

In [57]:
for listing in personalized_listings:
    print("\n" + "=" * 100 + "\n")
    print(listing)

print("\n" + "=" * 100 + "\n")



Welcome to this cozy 3-bedroom, 2-bathroom home nestled in the friendly neighborhood of Maple Grove. This inviting residence is designed for comfort, featuring a warm living room complete with a fireplace, perfect for those relaxing evenings spent with loved ones. The spacious kitchen is equipped with modern appliances and includes a breakfast nook, making it an ideal spot for casual dining and morning gatherings.

The master bedroom provides a peaceful retreat, featuring an en-suite bathroom for added convenience. Outside, you'll find a well-maintained backyard with a deck that’s perfect for outdoor gatherings, offering a wonderful space to connect with family and friends.

Maple Grove is renowned for its tranquil atmosphere and welcoming community, where residents enjoy nearby parks, local shops, and easy access to walking trails. This charming neighborhood is perfect for families and nature enthusiasts looking to embrace a vibrant lifestyle. Experience the warmth and comfort of th