## Setting Up the Python Application

### Install necessary packages (if not already installed in your workspace)

In [None]:
# !pip install langchain langchain-openai chromadb python-dotenv

In [None]:
import os
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_core.documents import Document
from langchain_community.vectorstores import Chroma
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser

# Load environment variables (e.g., OpenAI API key)
load_dotenv()
os.environ["OPEN_API_KEY"] = os.getenv("OPEN_API_KEY") # Replace with your actual key or use .env file

### Generating Real Estate Listings

In [None]:
llm = ChatOpenAI(model="gpt-4o", temperature=0.7)

listing_generation_prompt = """
Generate a diverse real estate listing in the following structured format. Ensure the description is engaging and highlights unique aspects of the property and neighborhood.

Neighborhood: [Neighborhood Name]
Price: $[Price]
Bedrooms: [Number of Bedrooms]
Bathrooms: [Number of Bathrooms]
House Size: [House Size in sqft]

Description: [Detailed description of the house, highlighting features, style, and atmosphere.]

Neighborhood Description: [Detailed description of the neighborhood, including community feel, amenities, local spots, and transportation.]

Here's an example:
Neighborhood: Green Oaks
Price: $800,000
Bedrooms: 3
Bathrooms: 2
House Size: 2,000 sqft

Description: Welcome to this eco-friendly oasis nestled in the heart of Green Oaks. This charming 3-bedroom, 2-bathroom home boasts energy-efficient features such as solar panels and a well-insulated structure. Natural light floods the living spaces, highlighting the beautiful hardwood floors and eco-conscious finishes. The open-concept kitchen and dining area lead to a spacious backyard with a vegetable garden, perfect for the eco-conscious family. Embrace sustainable living without compromising on style in this Green Oaks gem.

Neighborhood Description: Green Oaks is a close-knit, environmentally-conscious community with access to organic grocery stores, community gardens, and bike paths. Take a stroll through the nearby Green Oaks Park or grab a cup of coffee at the cozy Green Bean Cafe. With easy access to public transportation and bike lanes, commuting is a breeze.

Please generate 10 unique real estate listings.
"""

# Generate listings
print("Generating real estate listings...")
generated_listings_raw = llm.invoke(listing_generation_prompt).content

# Split the generated listings into individual listings
listings_list = generated_listings_raw.strip().split("\n\nNeighborhood: ")
# Remove the first empty string if splitting by "\n\nNeighborhood: "
if listings_list[0] == '':
    listings_list = listings_list[1:]

# Prepend "Neighborhood: " back to each listing
listings_list = ["Neighborhood: " + listing for listing in listings_list]

# Save generated listings to a file
with open("listings.txt", "w") as f:
    for listing in listings_list:
        f.write(listing + "\n\n---\n\n") # Add a separator for readability

print(f"Generated {len(listings_list)} listings and saved to listings.txt")

# Parse listings into a more usable format (list of dicts)
parsed_listings = []
for listing_str in listings_list:
    listing_dict = {}
    lines = listing_str.split('\n')
    for line in lines:
        if ':' in line:
            key, value = line.split(':', 1)
            key = key.strip().replace(' ', '_').lower() # Normalize keys
            value = value.strip()
            listing_dict[key] = value
    parsed_listings.append(listing_dict)

# Prepare documents for ChromaDB
documents = []
for idx, listing in enumerate(parsed_listings):
    full_content = (
        f"Neighborhood: {listing.get('neighborhood', 'N/A')}\n"
        f"Price: {listing.get('price', 'N/A')}\n"
        f"Bedrooms: {listing.get('bedrooms', 'N/A')}\n"
        f"Bathrooms: {listing.get('bathrooms', 'N/A')}\n"
        f"House Size: {listing.get('house_size', 'N/A')}\n\n"
        f"Description: {listing.get('description', 'N/A')}\n\n"
        f"Neighborhood Description: {listing.get('neighborhood_description', 'N/A')}"
    )
    documents.append(Document(page_content=full_content, metadata={"listing_id": idx, **listing}))

# Print a few generated listings for verification
for i, listing in enumerate(documents[:3]):
    print(f"\n--- Listing {i+1} ---")
    print(listing.page_content)

### Storing Listings in a Vector Database

In [None]:
# Initialize embedding model
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

# Initialize ChromaDB and add documents
# We'll use a persistent client to store the embeddings
CHROMA_PERSIST_DIR = "./chroma_db"
vectorstore = Chroma.from_documents(
    documents=documents,
    embedding=embeddings,
    persist_directory=CHROMA_PERSIST_DIR
)
vectorstore.persist()
print(f"Stored {len(documents)} listings in ChromaDB at {CHROMA_PERSIST_DIR}")

# Create a retriever for semantic search
retriever = vectorstore.as_retriever(search_kwargs={"k": 3}) # Retrieve top 3 relevant listings

### Building the User Preference Interface
### Searching Based on Preferences

In [None]:
def collect_buyer_preferences():
    """
    Collects buyer preferences through a series of questions or natural language input.
    Returns a dictionary of preferences.
    """
    print("\n--- Tell us about your dream home! ---")
    preferences = {}
    preferences['house_size'] = input("How big do you want your house to be (e.g., 'a comfortable three-bedroom house', 'at least 2500 sqft')? ")
    preferences['important_things'] = input("What are the 3 most important things for you in choosing this property (e.g., 'a quiet neighborhood, good local schools, and convenient shopping options')? ")
    preferences['amenities'] = input("Which amenities would you like (e.g., 'a backyard for gardening, a two-car garage, and a modern, energy-efficient heating system')? ")
    preferences['transportation'] = input("Which transportation options are important to you (e.g., 'easy access to a reliable bus line, proximity to a major highway, and bike-friendly roads')? ")
    preferences['neighborhood_urbanity'] = input("How urban do you want your neighborhood to be (e.g., 'a balance between suburban tranquility and access to urban amenities like restaurants and theaters')? ")
    
    # Combine preferences into a single query string for semantic search
    query_string = (
        f"I'm looking for a house with the following preferences: "
        f"House size: {preferences['house_size']}. "
        f"Most important things: {preferences['important_things']}. "
        f"Amenities: {preferences['amenities']}. "
        f"Transportation: {preferences['transportation']}. "
        f"Neighborhood type: {preferences['neighborhood_urbanity']}."
    )
    return query_string, preferences

def find_matching_listings(query):
    """
    Performs a semantic search on the vector database to find matching listings.
    """
    print(f"\nSearching for listings matching: '{query}'")
    relevant_docs = retriever.invoke(query)
    return relevant_docs

### Personalizing Listing Descriptions

In [None]:
def personalize_listing_description(listing_content, buyer_preferences_summary):
    """
    Uses the LLM to augment and personalize the listing description based on buyer preferences.
    """
    personalization_prompt_template = PromptTemplate.from_template(
        """
        You are a real estate agent trying to personalize a listing for a potential buyer.
        Given the original listing content and the buyer's preferences, rewrite the "Description"
        section of the listing to subtly emphasize aspects that align with the buyer's needs.
        Do not alter any factual information like price, bedrooms, bathrooms, or house size.
        Focus on making the property sound more appealing to this specific buyer.

        Original Listing Content:
        {listing_content}

        Buyer's Preferences:
        {buyer_preferences}

        Personalized Listing (only rewrite the Description, keep all other original details):
        """
    )

    personalization_chain = personalization_prompt_template | llm | StrOutputParser()

    print("\nPersonalizing listings...")
    personalized_description = personalization_chain.invoke({
        "listing_content": listing_content,
        "buyer_preferences": buyer_preferences_summary
    })
    return personalized_description

### Deliverables and Testing

In [None]:


def run_homematch():
    """Main function to run the HomeMatch application."""
    print("Welcome to HomeMatch - Your personalized home finder!")

    buyer_query, buyer_prefs_dict = collect_buyer_preferences()
    
    # Format buyer preferences for LLM personalization
    buyer_preferences_summary = (
        f"House size: {buyer_prefs_dict['house_size']}. "
        f"Most important things: {buyer_prefs_dict['important_things']}. "
        f"Amenities: {buyer_prefs_dict['amenities']}. "
        f"Transportation: {buyer_prefs_dict['transportation']}. "
        f"Neighborhood type: {buyer_prefs_dict['neighborhood_urbanity']}."
    )

    matching_listings = find_matching_listings(buyer_query)

    if not matching_listings:
        print("\nNo matching listings found for your preferences. Please try adjusting your criteria.")
        return

    print(f"\nFound {len(matching_listings)} potentially matching listings:")

    for i, doc in enumerate(matching_listings):
        print(f"\n--- Recommended Listing {i+1} ---")
        print("Original Listing:")
        print(doc.page_content)

        personalized_listing = personalize_listing_description(doc.page_content, buyer_preferences_summary)
        print("\nPersonalized Description for You:")
        
        # Extract only the rewritten description from the personalized_listing
        # This is a bit tricky as the LLM might re-generate the whole thing.
        # We need to parse it to get only the description part.
        
        # Simple parsing assumption: LLM will output the full listing with a modified description.
        # We will try to find the "Description:" line and extract the text until "Neighborhood Description:"
        
        description_start_tag = "Description:"
        neighborhood_description_start_tag = "Neighborhood Description:"

        desc_start_idx = personalized_listing.find(description_start_tag)
        neigh_desc_start_idx = personalized_listing.find(neighborhood_description_start_tag)

        if desc_start_idx != -1 and neigh_desc_start_idx != -1 and desc_start_idx < neigh_desc_start_idx:
            extracted_description = personalized_listing[desc_start_idx + len(description_start_tag):neigh_desc_start_idx].strip()
            print(f"Description: {extracted_description}")
            print(f"Neighborhood Description: {doc.metadata.get('neighborhood_description', 'N/A')}")
        elif desc_start_idx != -1: # If only description tag is found (LLM might not output neighborhood desc)
             extracted_description = personalized_listing[desc_start_idx + len(description_start_tag):].strip()
             print(f"Description: {extracted_description}")
             print(f"Neighborhood Description: {doc.metadata.get('neighborhood_description', 'N/A')}")
        else: # Fallback if LLM output format is unexpected, just print original content.
            print("Could not parse personalized description, showing full LLM output:")
            print(personalized_listing)
            print("--- (End of LLM output) ---")
            
        print("\n--------------------------")


if __name__ == "__main__":
    run_homematch()