# Project Config

In [12]:
# config.py
import os
from dotenv import load_dotenv

load_dotenv()

# API Keys
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
OPENAI_API_BASE = "https://openai.vocareum.com/v1"

# LLM Configuration
LLM_MODEL = "gpt-3.5-turbo"
EMBEDDING_MODEL = "text-embedding-ada-002"

# Vector DB Configuration
VECTOR_DB_PATH = "./vector_db"

# Application Settings
NUM_LISTINGS_TO_GENERATE = 15
NUM_LISTINGS_TO_RETURN = 3

# Data generation

In [11]:
# data_generation.py
import json
import random
from langchain.chat_models import ChatOpenAI
from langchain.schema import HumanMessage
from config import OPENAI_API_KEY, LLM_MODEL, NUM_LISTINGS_TO_GENERATE

def generate_listings(num_listings=NUM_LISTINGS_TO_GENERATE):
    """Generate synthetic real estate listings using LLM"""
    
    llm = ChatOpenAI(model=LLM_MODEL, openai_api_key=OPENAI_API_KEY)
    
    neighborhoods = ["Green Oaks", "Riverside Heights", "Downtown Central", 
                     "Sunset Hills", "Maple Grove", "Harbor View", 
                     "Oakwood Estates", "Pine Valley", "Lakeshore", "Hillcrest"]
    
    listings = []
    
    for i in range(num_listings):
        # Create a prompt for the LLM to generate a diverse listing
        neighborhood = random.choice(neighborhoods)
        bedrooms = random.randint(1, 5)
        bathrooms = random.randint(1, 4)
        price = random.randint(200, 1500) * 1000
        size = random.randint(800, 4000)
        
        prompt = f"""
        Generate a detailed real estate listing with the following specifications:
        - Neighborhood: {neighborhood}
        - Price: ${price:,}
        - Bedrooms: {bedrooms}
        - Bathrooms: {bathrooms}
        - House Size: {size} sqft
        
        Include a property description highlighting unique features and a separate neighborhood description.
        Format the output exactly as follows:
        
        Neighborhood: [neighborhood name]
        Price: [price]
        Bedrooms: [number]
        Bathrooms: [number]
        House Size: [size] sqft

        Description: [detailed property description]

        Neighborhood Description: [neighborhood description]
        """
        
        response = llm([HumanMessage(content=prompt)])
        listing_text = response.content
        
        # Parse the generated listing into structured format
        listing_data = {}
        sections = listing_text.split("\n\n")
        
        # Parse basic info
        basic_info = sections[0].strip().split("\n")
        for line in basic_info:
            if ":" in line:
                key, value = line.split(":", 1)
                listing_data[key.strip()] = value.strip()
        
        # Parse description and neighborhood
        for section in sections[1:]:
            if section.startswith("Description:"):
                listing_data["Description"] = section.replace("Description:", "", 1).strip()
            elif section.startswith("Neighborhood Description:"):
                listing_data["Neighborhood Description"] = section.replace("Neighborhood Description:", "", 1).strip()
        
        listings.append(listing_data)
    
    # Save listings to file
    with open("listings.json", "w") as f:
        json.dump(listings, f, indent=2)
    
    return listings

'''
if __name__ == "__main__":
    # Generate listings when run directly
    listings = generate_listings()
    print(f"Generated {len(listings)} listings and saved to listings.json")
'''



# Vector Store

In [None]:
# vector_store.py
import json
import chromadb
from chromadb.utils import embedding_functions
from langchain.embeddings import OpenAIEmbeddings
from config import OPENAI_API_KEY, EMBEDDING_MODEL, VECTOR_DB_PATH

class VectorStore:
    def __init__(self):
        """Initialize the vector database"""
        self.client = chromadb.PersistentClient(path=VECTOR_DB_PATH)
        self.embedding_function = embedding_functions.OpenAIEmbeddingFunction(
            api_key=OPENAI_API_KEY,
            model_name=EMBEDDING_MODEL
        )
        
        # Create collection if it doesn't exist
        try:
            self.collection = self.client.get_collection(
                name="real_estate_listings",
                embedding_function=self.embedding_function
            )
        except:
            self.collection = self.client.create_collection(
                name="real_estate_listings",
                embedding_function=self.embedding_function
            )
    
    def load_listings(self, listings_file="listings.json"):
        """Load listings from file and add to vector database"""
        with open(listings_file, "r") as f:
            listings = json.load(f)
        
        # Add listings to vector database
        documents = []
        metadatas = []
        ids = []
        
        for i, listing in enumerate(listings):
            # Create a combined text representation for embedding
            combined_text = f"""
            Neighborhood: {listing.get('Neighborhood', '')}
            Price: {listing.get('Price', '')}
            Bedrooms: {listing.get('Bedrooms', '')}
            Bathrooms: {listing.get('Bathrooms', '')}
            House Size: {listing.get('House Size', '')}
            Description: {listing.get('Description', '')}
            Neighborhood Description: {listing.get('Neighborhood Description', '')}
            """
            
            documents.append(combined_text)
            metadatas.append(listing)
            ids.append(f"listing_{i}")
        
        # Add to collection
        self.collection.add(
            documents=documents,
            metadatas=metadatas,
            ids=ids
        )
        
        return len(listings)
    
    def search_listings(self, query_text, n_results=3):
        """Search for listings based on query text"""
        results = self.collection.query(
            query_texts=[query_text],
            n_results=n_results
        )
        
        # Extract and return the matching listings
        matches = []
        if results and 'metadatas' in results and results['metadatas']:
            matches = results['metadatas'][0]
        
        return matches
'''
if __name__ == "__main__":
    # Test vector store when run directly
    vs = VectorStore()
    count = vs.load_listings()
    print(f"Loaded {count} listings into vector database")
    
    # Test search
    results = vs.search_listings("family home with a garden in a quiet neighborhood")
    print(f"Found {len(results)} matching listings")
'''

# Preference Parser

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.schema import HumanMessage
from config import OPENAI_API_KEY, LLM_MODEL

class PreferenceParser:
    def __init__(self):
        """Initialize the preference parser"""
        self.llm = ChatOpenAI(model=LLM_MODEL, openai_api_key=OPENAI_API_KEY)
    
    def collect_preferences(self, questions=None, answers=None):
        """Collect buyer preferences either from predefined Q&A or interactively"""
        if questions and answers:
            return self._process_predefined_preferences(questions, answers)
        else:
            return self._collect_interactive_preferences()
    
    def _process_predefined_preferences(self, questions, answers):
        """Process predefined questions and answers"""
        # Combine Q&A for context
        qa_pairs = "\n".join([f"Q: {q}\nA: {a}" for q, a in zip(questions, answers)])
        
        # Use LLM to extract key preferences
        prompt = f"""
        Based on the following buyer's responses to questions about their home preferences:
        
        {qa_pairs}
        
        Create a detailed summary of their preferences that can be used to search for matching properties.
        Focus on extracting specific details about:
        - Property size and layout
        - Important features and amenities
        - Neighborhood characteristics
        - Location requirements
        - Price range (if mentioned)
        
        Format your response as a comprehensive paragraph describing their ideal home.
        """
        
        response = self.llm([HumanMessage(content=prompt)])
        return response.content
    
    def _collect_interactive_preferences(self):
        """Collect preferences interactively from user input"""
        print("Please tell us about your ideal home:")
        
        questions = [
            "How big do you want your house to be?",
            "What are 3 most important things for you in choosing this property?",
            "Which amenities would you like?",
            "Which transportation options are important to you?",
            "How urban do you want your neighborhood to be?"
        ]
        
        answers = []
        for question in questions:
            print(f"\n{question}")
            answer = input("> ")
            answers.append(answer)
        
        return self._process_predefined_preferences(questions, answers)

if __name__ == "__main__":
    # Test preference parser when run directly
    parser = PreferenceParser()
    
    # Test with predefined Q&A
    questions = [
        "How big do you want your house to be?",
        "What are 3 most important things for you in choosing this property?",
        "Which amenities would you like?",
        "Which transportation options are important to you?",
        "How urban do you want your neighborhood to be?"
    ]
    
    answers = [
        "A comfortable three-bedroom house with a spacious kitchen and a cozy living room.",
        "A quiet neighborhood, good local schools, and convenient shopping options.",
        "A backyard for gardening, a two-car garage, and a modern, energy-efficient heating system.",
        "Easy access to a reliable bus line, proximity to a major highway, and bike-friendly roads.",
        "A balance between suburban tranquility and access to urban amenities like restaurants and theaters."
    ]
    
    preferences = parser.collect_preferences(questions, answers)
    print("\nProcessed Preferences:")
    print(preferences)


# Listing Personalizer

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.schema import HumanMessage
from config import OPENAI_API_KEY, LLM_MODEL

class ListingPersonalizer:
    def __init__(self):
        """Initialize the listing personalizer"""
        self.llm = ChatOpenAI(model=LLM_MODEL, openai_api_key=OPENAI_API_KEY)
    
    def personalize_listing(self, listing, preferences):
        """Personalize a listing based on buyer preferences"""
        # Extract listing details
        neighborhood = listing.get('Neighborhood', '')
        price = listing.get('Price', '')
        bedrooms = listing.get('Bedrooms', '')
        bathrooms = listing.get('Bathrooms', '')
        house_size = listing.get('House Size', '')
        description = listing.get('Description', '')
        neighborhood_desc = listing.get('Neighborhood Description', '')
        
        # Create a prompt for personalization
        prompt = f"""
        You are a real estate agent helping a potential buyer find their perfect home.
        
        BUYER PREFERENCES:
        {preferences}
        
        ORIGINAL LISTING:
        Neighborhood: {neighborhood}
        Price: {price}
        Bedrooms: {bedrooms}
        Bathrooms: {bathrooms}
        House Size: {house_size}
        
        Original Description: {description}
        
        Original Neighborhood Description: {neighborhood_desc}
        
        TASK:
        Rewrite the property description to highlight aspects that align with the buyer's preferences.
        Do NOT change any factual information about the property.
        Do NOT invent new features that aren't mentioned in the original description.
        DO emphasize existing features that match what the buyer is looking for.
        Make the description personal and engaging, addressing the buyer's specific needs.
        
        Format your response as:
        
        PERSONALIZED DESCRIPTION:
        [Your personalized property description]
        
        PERSONALIZED NEIGHBORHOOD DESCRIPTION:
        [Your personalized neighborhood description]
        """
        
        response = self.llm([HumanMessage(content=prompt)])
        personalized_text = response.content
        
        # Extract personalized descriptions
        personalized_property_desc = ""
        personalized_neighborhood_desc = ""
        
        if "PERSONALIZED DESCRIPTION:" in personalized_text:
            parts = personalized_text.split("PERSONALIZED NEIGHBORHOOD DESCRIPTION:")
            personalized_property_desc = parts[0].replace("PERSONALIZED DESCRIPTION:", "").strip()
            
            if len(parts) > 1:
                personalized_neighborhood_desc = parts[1].strip()
        
        # Create personalized listing
        personalized_listing = listing.copy()
        personalized_listing["Original Description"] = description
        personalized_listing["Original Neighborhood Description"] = neighborhood_desc
        personalized_listing["Description"] = personalized_property_desc
        personalized_listing["Neighborhood Description"] = personalized_neighborhood_desc
        
        return personalized_listing

if __name__ == "__main__":
    # Test listing personalizer when run directly
    personalizer = ListingPersonalizer()
    
    # Sample listing
    listing = {
        "Neighborhood": "Green Oaks",
        "Price": "$800,000",
        "Bedrooms": "3",
        "Bathrooms": "2",
        "House Size": "2,000 sqft",
        "Description": "Welcome to this eco-friendly oasis nestled in the heart of Green Oaks. This charming 3-bedroom, 2-bathroom home boasts energy-efficient features such as solar panels and a well-insulated structure. Natural light floods the living spaces, highlighting the beautiful hardwood floors and eco-conscious finishes. The open-concept kitchen and dining area lead to a spacious backyard with a vegetable garden, perfect for the eco-conscious family. Embrace sustainable living without compromising on style in this Green Oaks gem.",
        "Neighborhood Description": "Green Oaks is a close-knit, environmentally-conscious community with access to organic grocery stores, community gardens, and bike paths. Take a stroll through the nearby Green Oaks Park or grab a cup of coffee at the cozy Green Bean Cafe. With easy access to public transportation and bike lanes, commuting is a breeze."
    }
    
    # Sample preferences
    preferences = "The buyer is looking for a family-friendly home with 3+ bedrooms in a quiet neighborhood. They prioritize good schools, a backyard for their children to play, and energy-efficient features. They enjoy gardening and need space for this hobby. The family has one car but would appreciate access to public transportation. They value community and want to be close to parks and family-oriented activities."
    
    personalized = personalizer.personalize_listing(listing, preferences)
    
    print("ORIGINAL DESCRIPTION:")
    print(listing["Description"])
    print("\nPERSONALIZED DESCRIPTION:")
    print(personalized["Description"])
    print("\nORIGINAL NEIGHBORHOOD DESCRIPTION:")
    print(listing["Neighborhood Description"])
    print("\nPERSONALIZED NEIGHBORHOOD DESCRIPTION:")
    print(personalized["Neighborhood Description"])

# Main app

In [None]:
import json
from data_generation import generate_listings
from vector_store import VectorStore
from preference_parser import PreferenceParser
from listing_personalizer import ListingPersonalizer
from config import NUM_LISTINGS_TO_RETURN

class HomeMatch:
    def __init__(self):
        """Initialize the HomeMatch application"""
        self.vector_store = VectorStore()
        self.preference_parser = PreferenceParser()
        self.listing_personalizer = ListingPersonalizer()
    
    def setup(self, regenerate_listings=False):
        """Set up the application by generating listings and loading them into the vector store"""
        if regenerate_listings:
            print("Generating new real estate listings...")
            self.listings = generate_listings()
            print(f"Generated {len(self.listings)} listings")
        else:
            try:
                with open("listings.json", "r") as f:
                    self.listings = json.load(f)
                print(f"Loaded {len(self.listings)} existing listings from file")
            except FileNotFoundError:
                print("No existing listings found. Generating new listings...")
                self.listings = generate_listings()
                print(f"Generated {len(self.listings)} listings")
        
        # Load listings into vector store
        print("Loading listings into vector database...")
        count = self.vector_store.load_listings()
        print(f"Loaded {count} listings into vector database")
    
    def process_buyer_preferences(self, questions=None, answers=None):
        """Process buyer preferences from questions and answers or collect interactively"""
        print("Processing buyer preferences...")
        self.preferences = self.preference_parser.collect_preferences(questions, answers)
        print("Buyer preferences processed")
        return self.preferences
    
    def find_matching_listings(self, n_results=NUM_LISTINGS_TO_RETURN):
        """Find listings that match the buyer's preferences"""
        print(f"Searching for up to {n_results} matching listings...")
        matches = self.vector_store.search_listings(self.preferences, n_results)
        print(f"Found {len(matches)} matching listings")
        return matches
    
    def personalize_listings(self, matches):
        """Personalize the matched listings based on buyer preferences"""
        print("Personalizing listings...")
        personalized_listings = []
        
        for match in matches:
            print(f"Personalizing listing in {match.get('Neighborhood', 'Unknown')}...")
            personalized = self.listing_personalizer.personalize_listing(match, self.preferences)
            personalized_listings.append(personalized)
        
        print(f"Personalized {len(personalized_listings)} listings")
        return personalized_listings
    
    def display_personalized_listings(self, personalized_listings):
        """Display the personalized listings"""
        print("\n" + "="*80)
        print("PERSONALIZED LISTINGS FOR YOU")
        print("="*80)
        
        for i, listing in enumerate(personalized_listings, 1):
            print(f"\nLISTING {i}: {listing.get('Neighborhood', 'Unknown')}")
            print("-"*80)
            print(f"Price: {listing.get('Price', 'N/A')}")
            print(f"Bedrooms: {listing.get('Bedrooms', 'N/A')}")
            print(f"Bathrooms: {listing.get('Bathrooms', 'N/A')}")
            print(f"House Size: {listing.get('House Size', 'N/A')}")
            print("\nDESCRIPTION:")
            print(listing.get('Description', 'No description available'))
            print("\nNEIGHBORHOOD:")
            print(listing.get('Neighborhood Description', 'No neighborhood description available'))
            print("-"*80)