In [1]:
import os
import random
from langchain.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
import pandas as pd

In [2]:
# Replace with your actual OpenAI API key
# You might need to replace os.environ.get('openai_key') with your own OPEN_AI_KEY
model_name = "gpt-3.5-turbo"
temperature = 0.0
llm = ChatOpenAI(model_name=model_name, openai_api_key=os.environ.get('openai_key'))
embeddings_model  = OpenAIEmbeddings( openai_api_key=os.environ.get('openai_key'))

In [10]:
neighborhoods = ["Green Oaks", "Blue Lagoon", "Silver City"]

In [17]:

neighborhood_dict = {}
def get_description_for_neighborhood(neighborhood):    
    neighborhood_description_prompt = ChatPromptTemplate.from_template(f"""
    Describe the neighborhood {neighborhood} in a few sentences, include schools, cafes, restaurants and infrastructure.
    """)
    
    return llm(neighborhood_description_prompt.format()).content

for neighborhood in neighborhoods:
    neighborhood_dict[neighborhood] = get_description_for_neighborhood(neighborhood)

neighborhood_dict

{'Green Oaks': 'Green Oaks is a charming neighborhood located in the suburbs of a bustling city. The area is known for its top-rated schools, including Green Oaks Elementary and Green Oaks High School, providing excellent education opportunities for families. The neighborhood is dotted with cozy cafes and family-friendly restaurants, offering a variety of cuisines to choose from. The infrastructure in Green Oaks is well-maintained, with tree-lined streets, parks, and community centers that provide ample opportunities for outdoor activities and social gatherings. Overall, Green Oaks is a peaceful and welcoming neighborhood with a strong sense of community.',
 'Blue Lagoon': 'Blue Lagoon is a vibrant neighborhood located near a picturesque lagoon, known for its peaceful atmosphere and stunning views. The area is home to several top-rated schools, making it a popular choice for families with children. There are also plenty of charming cafes and restaurants serving a variety of cuisine, fr

In [18]:
def generate_house_listing(neighborhoods, neighborhood_descriptions):
    neighborhood = random.choice(neighborhoods)
    price = random.randint(10000, 2000000)
    bedrooms = random.randint(1, 5)
    bathrooms = random.randint(1, bedrooms)
    house_size = random.randint(500, 5000)  # Assuming size is in square feet

    description_prompt = ChatPromptTemplate.from_template(f"""
    Describe a house with the following attributes:
    - Neighborhood: {neighborhood}
    - Price: {price}
    - Bedrooms: {bedrooms}
    - Bathrooms: {bathrooms}
    - House Size: {house_size} square feet
    """)
    
    description = llm(description_prompt.format()).content

    return {
        "Neighborhood": neighborhood,
        "Price": price,
        "Bedrooms": bedrooms,
        "Bathrooms": bathrooms,
        "HouseSize": house_size,
        "Description": description,
        "NeighborhoodDescription": neighborhood_descriptions[neighborhood]
    }

def generate_listings(num_listings=10):
    listings = [generate_house_listing(neighborhoods, neighborhood_dict) for _ in range(num_listings)]
    return listings

listings = generate_listings()


In [None]:

for idx, listing in enumerate(listings, start=1):
    print(f"Listing {idx}:")
    for key, value in listing.items():
        print(f"{key}: {value}")
    print("\n")


In [40]:
pd.DataFrame(listings).to_csv('listings.csv', index=False)

# Load the listings

In [3]:
df_listings = pd.read_csv('listings.csv')

In [4]:
df_listings['house_id'] = df_listings.index

# Step 3: Storing Listings in a Vector Database
I use lancedb for this, because I already worked with it.

## 3.1 Create embeddings

In [5]:
def generate_embedding(list_to_embed: list):
    return embeddings_model.embed_documents(list_to_embed)

In [6]:
df_listings['embedding'] = df_listings.apply(lambda x: generate_embedding([str(x['Description'] + x['NeighborhoodDescription'])])[0], axis=1)

## 3.2 Setup lancedb

In [7]:
import lancedb
from lancedb.pydantic import LanceModel, vector
db = lancedb.connect("~/.lancedb")

In [8]:
class HouseListing(LanceModel):
    house_id: int
    embedding: vector(1536)
    Neighborhood: str
    Price: float
    Bedrooms: int
    Bathrooms: int
    HouseSize: int
    Description: str
    NeighborhoodDescription: str

In [9]:
table = db.create_table("house_listings", df_listings, schema=HouseListing, mode="overwrite")

## Step 4: Building the User Preference Interface
This is static, to ensure compareable results. But the possibility to generate answers based on given questions is implemented.


In [10]:
questions = [   
                "How big do you want your house to be?",
                "How much rooms should your house have?",
                "What are 3 most important things for you in choosing this property?", 
                "Which amenities would you like?", 
                "Which transportation options are important to you?",
                "How urban do you want your neighborhood to be?",   
            ]

In [11]:
def get_answers(questions: list):
    answers_template = ChatPromptTemplate.from_template("""
    For the given questions {questions} generate answers for each question, that clarify the wishes of the client, who wants to buy a house. 
    Genrate one answer for each question, and numerate them.
    """)
    answers_prompt = answers_template.format(questions=questions)
    return llm(answers_prompt).content

In [12]:
get_answers(questions)

  warn_deprecated(


'1. I would like my house to be around 2,000 square feet. \n2. I would prefer my house to have at least 3 bedrooms and 2 bathrooms. \n3. The 3 most important things for me in choosing this property are a safe neighborhood, proximity to good schools, and access to outdoor recreational areas. \n4. I would like amenities such as a swimming pool, a backyard patio, and a modern kitchen. \n5. Important transportation options for me include easy access to public transportation, bike lanes, and major highways. \n6. I would prefer my neighborhood to be moderately urban, with access to shopping and dining options within walking distance.'

In [13]:
answers = [
    "A comfortable three-bedroom house with a spacious kitchen and a cozy living room.",
    "A quiet neighborhood, good local schools, and convenient shopping options.",
    "A backyard for gardening, a two-car garage, and a modern, energy-efficient heating system.",
    "Easy access to a reliable bus line, proximity to a major highway, and bike-friendly roads.",
    "A balance between suburban tranquility and access to urban amenities like restaurants and theaters."
]

In [14]:
embedded_answers = generate_embedding(answers)

# Step 5: Searching Based on Preferences
With the generated embedded answers, to a search on the lancedb table, sort the results based on the mean value _distance grouped by house_ids.

In [15]:
results = []
for e_answer in embedded_answers:
    results.append(table.search(e_answer).limit(5).to_pandas())
df_results = pd.concat(results)
houses_sorted = df_results.groupby('house_id')['_distance'].mean().sort_values()

In [16]:
house_to_suggest = df_results[df_results['house_id']==houses_sorted.idxmin()].drop_duplicates(subset=['house_id'])

In [17]:
house_description = '\n '.join(col for col in house_to_suggest.iloc[0].get(['Description', 'NeighborhoodDescription']).to_dict().values())

# Step 6 Personalizing Listing Descriptions

In [18]:
def personalize_listing(answers: list, questions: list, house_description: str):
    personalize_template = """
    For the given answers {answers} based on the questions {questions}, create a personalized suggestion for the house with the description {house_description}.
    """
    personalize_prompt = ChatPromptTemplate.from_template(personalize_template).format(answers=answers, questions=questions, house_description=house_description)
    return llm(personalize_prompt).content

In [19]:
personalized_listing = personalize_listing(answers, questions, house_description)

In [20]:
print(personalized_listing)

Based on your answers, it seems like you are looking for a cozy and comfortable house in a quiet neighborhood with good local schools and convenient shopping options. You also mentioned wanting a backyard for gardening, a two-car garage, and modern amenities like an energy-efficient heating system. In terms of transportation, you value easy access to a reliable bus line and bike-friendly roads. Additionally, you mentioned wanting a balance between suburban tranquility and urban amenities.

With that in mind, I would suggest considering a house in the charming Green Oaks neighborhood. This cozy house offers three bedrooms and two bathrooms, providing ample space for a small family or individuals looking for a peaceful retreat. Priced at $144,769, this property is affordable and offers a manageable living space of 509 square feet.

The Green Oaks neighborhood is known for its top-rated schools, convenient shopping options, and peaceful surroundings. The house features a well-maintained l