This is a starter notebook for the project, you'll have to import the libraries you'll need, you can find a list of the ones available in this workspace in the requirements.txt file in this workspace. 

## Import Necessary Modules

In [1]:
import os
import pandas as pd
import openai
import logging

## Step 1: Initialize OpenAI API

In [2]:
# Step 1: Initialize OpenAI and ChromaDB
os.environ["OPENAI_API_KEY"] = "YOUR API KEY"
os.environ["OPENAI_API_BASE"] = "https://openai.vocareum.com/v1"

# Initialize OpenAI API
openai.api_key = os.environ["OPENAI_API_KEY"]
openai.api_base = os.environ["OPENAI_API_BASE"]

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

logger.info("Starting the script.")

INFO:__main__:Starting the script.


### Generating and Parsing Real Estate Listings



In [3]:
# File paths
listings_csv_path = './real_estate_listings.csv'

def generate_listing():
    prompt = """Generate a detailed real estate listing with the following format:
Neighborhood: <Neighborhood Name>
Price: <$Price>
Bedrooms: <Number of Bedrooms>
Bathrooms: <Number of Bathrooms>
House Size: <Size in sqft>

Description: <Detailed description of the property>

Neighborhood Description: <Description of the neighborhood>
"""
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "system", "content": prompt}],
        max_tokens=500
    )
    return response['choices'][0]['message']['content'].strip()

def parse_listing(listing):
    lines = listing.split('\n')
    listing_dict = {
        'Neighborhood': '',
        'Price': '',
        'Bedrooms': '',
        'Bathrooms': '',
        'House Size': '',
        'Description': '',
        'Neighborhood Description': ''
    }

    for line in lines:
        if line.startswith('Neighborhood:'):
            listing_dict['Neighborhood'] = line.split('Neighborhood: ')[1]
        elif line.startswith('Price:'):
            listing_dict['Price'] = line.split('Price: ')[1]
        elif line.startswith('Bedrooms:'):
            listing_dict['Bedrooms'] = line.split('Bedrooms: ')[1]
        elif line.startswith('Bathrooms:'):
            listing_dict['Bathrooms'] = line.split('Bathrooms: ')[1]
        elif line.startswith('House Size:'):
            listing_dict['House Size'] = line.split('House Size: ')[1]
        elif line.startswith('Description:'):
            description = line.split('Description: ')[1]
            for next_line in lines[lines.index(line) + 1:]:
                if next_line.startswith('Neighborhood Description:'):
                    break
                description += ' ' + next_line.strip()
            listing_dict['Description'] = description
        elif line.startswith('Neighborhood Description:'):
            neighborhood_description = line.split('Neighborhood Description: ')[1]
            for next_line in lines[lines.index(line) + 1:]:
                neighborhood_description += ' ' + next_line.strip()
            listing_dict['Neighborhood Description'] = neighborhood_description

    return listing_dict

# Step 2: Generating Real Estate Listings (if not already generated)
if not os.path.exists(listings_csv_path):
    logger.info("Generating real estate listings.")
    listings = []

    for i in range(10):
        listing = generate_listing()
        parsed_listing = parse_listing(listing)
        listings.append(parsed_listing)
        logger.info(f"Generated listing {i+1}: {listing[:60]}...")

    # Save listings to CSV
    df_listings = pd.DataFrame(listings)
    df_listings.to_csv(listings_csv_path, index=False)
    logger.info(f"Saved generated listings to {listings_csv_path}.")
else:
    # Load listings from CSV
    df_listings = pd.read_csv(listings_csv_path)
    listings = df_listings.to_dict('records')
    logger.info(f"Loaded listings from {listings_csv_path}.")

INFO:__main__:Loaded listings from ./real_estate_listings.csv.


### Setting Up Real Estate Listings Retrieval and Personalization

In [4]:
import os
import pandas as pd
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.chains.question_answering import load_qa_chain

# Initialize OpenAI API
openai.api_key = os.environ["OPENAI_API_KEY"]
openai.api_base = os.environ["OPENAI_API_BASE"]

# Load real estate listings from CSV
loader = CSVLoader(file_path='./real_estate_listings.csv')
docs = loader.load()

# Initialize the language model
model_name = 'gpt-3.5-turbo'
llm = OpenAI(model_name=model_name, temperature=0, max_tokens=2000)

# Split documents into manageable chunks
splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
split_docs = splitter.split_documents(docs)

# Create embeddings for the documents
embeddings = OpenAIEmbeddings()

# Store documents in Chroma vector database
db = Chroma.from_documents(split_docs, embeddings)


# Step 4: Building the User Preference Interface
questions = [
    "How big do you want your house to be?", 
    "What are the 3 most important things for you in choosing this property?", 
    "Which amenities would you like?", 
    "Which transportation options are important to you?", 
    "How urban do you want your neighborhood to be?"
]

answers1 = [
    "A luxurious five-bedroom villa with high-end finishes and top-of-the-line appliances.",
    "An exclusive, upscale neighborhood with world-class shopping, dining, and entertainment options.",
    "A gourmet chef's kitchen, a grand foyer, and a beautifully landscaped backyard with a pool and outdoor kitchen.",
    "Privacy and security in a gated community with access to top-rated schools and public parks.",
    "A prestigious area known for its celebrity residents and luxurious lifestyle."
]

# property_name1 = "Beverly Hills"


answers2 = [
    "A charming three-bedroom townhouse with hardwood floors and high ceilings.",
    "A vibrant neighborhood with historic brownstones and a lively community.",
    "A modern kitchen with granite countertops and a backyard for relaxing.",
    "Proximity to a park with walking and biking paths, playgrounds, and recreational facilities.",
    "Excellent schools and easy access to public transportation and diverse amenities."
]

INFO:chromadb.telemetry.posthog:Anonymized telemetry enabled. See https://docs.trychroma.com/telemetry for more information.


### Function to Retrieve and Personalize Real Estate Listings Based on Buyer Preferences

In [5]:
def get_personalized_listings(answers):
    # Buyer Preference Parsing
    preferences = " ".join(answers)

    # Step 5: Searching Based on Preferences
    query = f"""
        Based on the listings in the context, find properties that match the following buyer preferences:
        {preferences}
        """

    use_chain_helper = False
    if use_chain_helper:
        rag = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=db.as_retriever())
        result = rag.run(query)
    else:
        similar_docs = db.similarity_search(query, k=1)
        context = "\n".join([doc.page_content for doc in similar_docs])
        prompt = PromptTemplate(
            template="{query}\nContext: {context}",
            input_variables=["query", "context"],
        )
        chain = load_qa_chain(llm, prompt=prompt, chain_type="stuff")
        result = chain.run(input_documents=similar_docs, query=query)

    # Step 6: Personalizing Listing Descriptions
    personalized_descriptions = []

    # Create an instance of the language model with higher temperature for creativity
    creative_llm = OpenAI(model_name=model_name, temperature=0.8, max_tokens=2000)

    for doc in similar_docs:
        listing = doc.page_content
        augmented_listing = creative_llm(f"Tailor this listing to emphasize the buyer's preferences: {preferences}\n\n{listing}")
        personalized_descriptions.append(augmented_listing)

    # Output the results
    for idx, description in enumerate(personalized_descriptions, 1):
        print(f"Personalized Listing {idx}:\n{description}\n")

### Demo

In [6]:
get_personalized_listings(answers1)
# expected answer: Beverly Hills

Personalized Listing 1:
Don't miss out on the opportunity to own this luxurious five-bedroom villa in the prestigious Beverly Hills neighborhood. With high-end finishes, top-of-the-line appliances, and a gourmet chef's kitchen, this home is perfect for those who appreciate the finer things in life. Enjoy privacy and security in a gated community, with access to top-rated schools and public parks. Indulge in the upscale shopping, dining, and entertainment options just minutes away. Join the ranks of celebrity residents and live the luxurious lifestyle you deserve in Beverly Hills. Offered at $5,000,000, this is a once-in-a-lifetime opportunity not to be missed.



In [7]:
get_personalized_listings(answers2)
# expected answer: Park Slope

Personalized Listing 1:


Don't miss out on this opportunity to own a charming townhouse in the heart of Park Slope! With its hardwood floors, high ceilings, and modern kitchen, this home is perfect for buyers who appreciate a mix of historic charm and contemporary design. Enjoy the vibrant neighborhood with its historic brownstones and lively community, and take advantage of the nearby park with walking and biking paths, playgrounds, and recreational facilities. With excellent schools and easy access to public transportation, this townhouse offers a convenient and comfortable lifestyle for you and your family. Listed at $1,200,000, this home won't be on the market for long - schedule a showing today!



In [8]:
answers3 = [
    "A spacious four-bedroom home with a master suite featuring a walk-in closet and en-suite bathroom with dual vanity sinks, a soaking tub, and a separate shower.",
    "A peaceful neighborhood known for its well-maintained properties, family-friendly environment, and nearby park with sidewalks for strolling and outdoor activities.",
    "An open concept layout with a modern kitchen equipped with stainless steel appliances, granite countertops, and a large island perfect for gathering.",
    "A beautifully landscaped backyard with a patio and plenty of green space, ideal for entertaining and family gatherings.",
    "Conveniently located near a variety of shops, restaurants, and schools, offering the perfect blend of suburban tranquility and accessibility to urban amenities."
]

# expected answer: Hilltop Estates

get_personalized_listings(answers3)

Personalized Listing 1:
Don't miss out on this spacious four-bedroom home in the peaceful Hilltop Estates neighborhood! The master suite features a walk-in closet and luxurious en-suite bathroom with dual vanity sinks, soaking tub, and separate shower. The open concept layout includes a modern kitchen with stainless steel appliances, granite countertops, and a large island perfect for gathering. Outside, the beautifully landscaped backyard is ideal for entertaining and family gatherings with a patio and plenty of green space. With nearby parks, sidewalks, and easy access to shops, restaurants, and schools, Hilltop Estates offers the perfect blend of suburban tranquility and urban accessibility. Schedule a tour today and make this your dream home!

