# HomeMatch

This application leverages large language models (LLMs) and vector database to transform standard real estate listings into personalized narratives that resonate with potential buyers' unique preferences and needs.

In [1]:
from langchain.llms import OpenAI
import json

openai_api_key = "[INSERT OPEN AI KEY HERE]"

llm = OpenAI(model_name="gpt-3.5-turbo-instruct", openai_api_key=openai_api_key)
count = 20

def try_get_llm_json(prompt):
  max_try = 10
  while max_try > 0:
    try:
      response = llm.generate([prompt])
      return json.loads(response.generations[0][0].text)
    except:
      # print("LLM response: ", response.generations[0][0].text)
      print("LLM returnd no valid JSON response. Trying again...")
      max_try = max_try - 1
      pass

def generate_real_estate_listing(llm, count):
  listings = []
  
  while len(listings) < count:
    prompt = f"""
    Generate a highly individual and realistic real estate listings. 
    The listings should be in the following JSON structure: 

      {{
        "Title": "A catchy and realistic title of the real estate",
        "Description": "A detailed and realistic description of the real estate",
        "Price": "A realistic price for the features in the description",
        "Bedrooms": "a number between 1 and 3",
        "Bathrooms": "a number between 1 and 3",
        "House Size in sqft": "a size of the real estate between 1000 and 5000 sqft",
        "Neighborhood Description": "A realistic neighborhood description of the real estate"
        "Location": "A realistic location of the real estate",
        "Property Type": "A realistic property type of the real estate"
      }}
    
    The listing should be a random property types and be located at a random locations.
    Examples are: a luxury penthouse in downtown Manhattan, a cozy cottage in the English countryside, a modern loft in Berlin's trendy Kreuzberg district, a beachfront villa in Malibu, California, a traditional ryokan in Kyoto, Japan, a spacious family home in the suburbs of Sydney, Australia, a historic chateau in the French Riviera, a ski chalet in the Swiss Alps, a colonial-style mansion in Charleston, South Carolina, and a minimalist apartment in Copenhagen, Denmark.
    Use a random tow and property type - do not use one of the examples above.
    Include into the descriptions attributes like:
    - 3 most important things about the property
    - amenities offered by this property
    - transportation options
    Answer this prompt only with the valid JSON structue and don't include any free text."""

    listing = try_get_llm_json(prompt)
    print("generated ", len(listings), ". listing: ", listing["Title"])
    listings.append(listing)
        
  return listings

listings = generate_real_estate_listing(llm = llm, count = count)

generated  0 . listing:  Luxury Condo in the heart of Toronto
generated  1 . listing:  Luxurious Waterfront Villa in Miami
LLM returnd no valid JSON response. Trying again...
LLM returnd no valid JSON response. Trying again...
LLM returnd no valid JSON response. Trying again...
LLM returnd no valid JSON response. Trying again...
LLM returnd no valid JSON response. Trying again...
LLM returnd no valid JSON response. Trying again...
generated  2 . listing:  Stylish Loft in the Heart of Amsterdam
LLM returnd no valid JSON response. Trying again...
LLM returnd no valid JSON response. Trying again...
generated  3 . listing:  Luxurious Condo in the Heart of San Francisco
LLM returnd no valid JSON response. Trying again...
LLM returnd no valid JSON response. Trying again...
generated  4 . listing:  Stunning Villa with Ocean Views
LLM returnd no valid JSON response. Trying again...
LLM returnd no valid JSON response. Trying again...
generated  5 . listing:  Luxury Oceanfront Condo in Miami Bea

In [2]:
# using the chromadb as a vector database to store the generated listings
import chromadb
client = chromadb.Client()

collection_name = "realestatelisting"

collections = client.list_collections()
if collection_name in [c.name for c in collections] :
    client.delete_collection(name=collection_name) # delete collection, if it already exsists to start with a clean state

collection = client.create_collection(collection_name)

collection.add(
    documents = ["\n".join(f"{key}: {value}" for key, value in listing.items()) for listing in listings], # The text of the description + the title is used as the document
    metadatas = [{"title": listing["Title"], "price": listing["Price"], "location": listing["Location"], "property_type": listing["Property Type"]} for listing in listings], # some metadata
    ids=[str(index) for index, listing in enumerate(listings)]  # we use the index in the original listing list as a Unique ID for each document
)


In [3]:
# the questions and answers for the real estate chatbot

questions = [   
                "How big do you want your house to be?",
                "What are 3 most important things for you in choosing this property?", 
                "Which amenities would you like?", 
                "Which transportation options are important to you?",
                "How urban do you want your neighborhood to be?",   
            ]
answers = [
    "A comfortable three-bedroom house with a spacious kitchen and a cozy living room.",
    "A quiet neighborhood, good local schools, and convenient shopping options.",
    "A backyard for gardening, a two-car garage, and a modern, energy-efficient heating system.",
    "Easy access to a reliable bus line, proximity to a major highway, and bike-friendly roads.",
    "A balance between suburban tranquility and access to urban amenities like restaurants and theaters."
]

In [11]:
# combining the q&a pairs into a single string to be included into the prompt
q_and_a = "\n\n".join(f"question: {key}/nAnswer: {value}" for key, value in list(zip(questions, answers)))

# creating an extraction prompt for the questions and answers to generate single filter criteria for the real estate listings
extraction_prompt = f"""
Please extract the real estate properties the user is searching from following text:
{q_and_a}

Answer with description of the real estate user prefers by incorporating all attributes the user mentioned above.
"""

description = llm.generate([extraction_prompt]).generations[0][0].text

In [13]:
results = collection.query(query_texts=[description], n_results = 3)

formatted_results = [
    f"ID: {results['ids'][0][index]}\n"
    f"Price: {results['metadatas'][0][index]['price']}\n"
    f"Location: {results['metadatas'][0][index]['location']}\n"
    f"Real estate type: {results['metadatas'][0][index]['property_type']}\n"
    f"Description: {results['documents'][0][index]}\n"
    f"\n\n--\n"
    for index in range(len(results["ids"][0]))
]

formatted_output = "\n".join(formatted_results)

answer_generation_prompt = f"""
We have the following real estate listings:
---
{formatted_results}
---

The user is searching for a real estate with the following properties.
{q_and_a}

If we have no matching listings, we will clearly describe that we don't have a full match and provide the user with the most similar ones.

Please generate a response to the user's query and reference each of the real estate listings above.
Include references to the description, location, price and real estate type of the listings. Highlight the most relevant information for the user's query.
Ensure personalization emphasizes characteristics appealing to the buyer, but do not alter factual information about the property.
Always include the ID of the real estate listing you are referring to.
"""

response = llm.generate([answer_generation_prompt])
print(response.generations[0][0].text)


Based on your preferences, we have found three potential properties that may fit your criteria.

First, ID 14 is a luxurious penthouse in Manhattan, New York. This stunning property boasts 360-degree city views, a private rooftop terrace, and top-of-the-line appliances. With 3 bedrooms and 2.5 bathrooms, this penthouse offers ample space for relaxation and entertaining. It is located in a bustling downtown area, surrounded by some of the city's best restaurants, bars, and shopping options. The price for this penthouse is $2,500,000.

Next, ID 0 is a luxury condo in the heart of Toronto, Canada. This property offers breathtaking city views, 3 spacious bedrooms, and 2 luxurious bathrooms. With a sleek and modern kitchen, floor-to-ceiling windows, and an open-concept layout, this condo is perfect for urban living. It is located in downtown Toronto, surrounded by trendy restaurants, cafes, and shops. The price for this condo is $1,500,000.

Lastly, ID 12 is a modern and spacious loft in d