This is a starter notebook for the project, you'll have to import the libraries you'll need, you can find a list of the ones available in this workspace in the requirements.txt file in this workspace. 

In [1]:
import os

os.environ["OPENAI_API_KEY"] = "YOUR_API_KEY"
os.environ["OPENAI_API_BASE"] = "https://openai.vocareum.com/v1"

from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document

llm = OpenAI(model_name="gpt-3.5-turbo")



In [2]:
general_prompt = """
Generate 10 unique real estate listings with detailed information. Format each listing as follows:
Listing 1:
Neighborhood: {Neighborhood}
Price: {Price}
Bedrooms: {Number of Bedrooms}
Bathrooms: {Number of Bathrooms}
House Size: {Size in sqft}
Description: {Detailed description}

Listing 2:
Neighborhood: {Neighborhood}
Price: {Price}
Bedrooms: {Number of Bedrooms}
Bathrooms: {Number of Bathrooms}
House Size: {Size in sqft}
Description: {Detailed description}

Continue this format for all 10 listings. Provide realistic, diverse properties, including a mix of urban and suburban homes.
"""

generated_listings = llm(general_prompt)

# Save the generated text directly to a file
with open("listings.txt", "w") as f:
    f.write(generated_listings)

print(generated_listings)

Listing 1:
Neighborhood: Upper East Side, Manhattan, New York
Price: $2,500,000
Bedrooms: 4
Bathrooms: 3
House Size: 2,500 sqft
Description: Stunning penthouse with city views, luxurious finishes, and a rooftop terrace.

Listing 2:
Neighborhood: Beverly Hills, California
Price: $5,000,000
Bedrooms: 5
Bathrooms: 6
House Size: 6,000 sqft
Description: Gated estate with a pool, tennis court, and guest house in the prestigious area of Beverly Hills.

Listing 3:
Neighborhood: Capitol Hill, Seattle, Washington
Price: $800,000
Bedrooms: 3
Bathrooms: 2
House Size: 1,800 sqft
Description: Charming craftsman home with a spacious backyard and mountain views in a sought-after neighborhood.

Listing 4:
Neighborhood: South Beach, Miami, Florida
Price: $1,200,000
Bedrooms: 2
Bathrooms: 2.5
House Size: 1,500 sqft
Description: Modern condo with ocean views, designer kitchen, and access to a pool and fitness center.

Listing 5:
Neighborhood: Bucktown, Chicago, Illinois
Price: $600,000
Bedrooms: 3
Bathroo

In [9]:
embeddings = OpenAIEmbeddings()

# Load listings from the file generated earlier
with open("listings.txt", "r") as file:
    listings_data = file.read()

# Split the listings data into chunks
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
listings = splitter.split_text(listings_data)

documents = [Document(page_content=text) for text in listings]

# Initialize and populate Chroma database
vectorstore = Chroma(persist_directory="real_estate_db", embedding_function=embeddings)
vectorstore.add_documents(documents)

# Persist the database to disk
vectorstore.persist()
print("Vector database created and populated successfully.")

Vector database created and populated successfully.


In [3]:
# Load listings from the file generated earlier
with open("listings.txt", "r") as file:
    listings_data = file.read()
    
# User preferences
preferences = {
    "location": "California",
    "price_range": "$1,500,000 - $6,000,000",
    "bedrooms": "4 - 6",
    "bathrooms": "4 - 6",
    "amenities": ["backyard", "pool"],
    "lifestyle": "Luxurious",
}

# Semantic search using the LLM to evaluate the listings and match them with preferences
prompt = f"""
You are a real estate expert. Below are several property listings, and I would like you to help me find the best matches based on the following preferences:

Preferences:
Location: {preferences['location']}
Price Range: {preferences['price_range']}
Bedrooms: {preferences['bedrooms']}
Bathrooms: {preferences['bathrooms']}
Amenities: {', '.join(preferences['amenities'])}
Lifestyle: {preferences['lifestyle']}

Here are the property listings:

{listings_data}

Please return the listings that most closely match my preferences, providing a summary of why each match fits.
Only return the listings without any additional text.
"""

best_matches = llm(prompt)

print("Best Matching Listings:")
print(best_matches)

Best Matching Listings:
Listing 2:
Neighborhood: Beverly Hills, California
Price: $5,000,000
Bedrooms: 5
Bathrooms: 6
House Size: 6,000 sqft
Description: Gated estate with a pool, tennis court, and guest house in the prestigious area of Beverly Hills.

Listing 8:
Neighborhood: Pacific Heights, San Francisco, California
Price: $3,500,000
Bedrooms: 6
Bathrooms: 4
House Size: 4,500 sqft
Description: Victorian mansion with a grand staircase, bay windows, and garden in the exclusive Pacific Heights neighborhood.

Listing 10:
Neighborhood: Hollywood Hills, Los Angeles, California
Price: $4,000,000
Bedrooms: 5
Bathrooms: 4.5
House Size: 5,000 sqft
Description: Modern villa with panoramic city views, infinity pool, home theater, and guest suite in the Hollywood Hills.


In [4]:
from langchain.llms import OpenAI

# Augmenting Listing Descriptions
prompt = f"""
You are a real estate expert. Below are multiple property listings, and I would like you to personalize the descriptions based on the following preferences:

Preferences:
Location: {preferences['location']}
Price Range: {preferences['price_range']}
Bedrooms: {preferences['bedrooms']}
Bathrooms: {preferences['bathrooms']}
Amenities: {', '.join(preferences['amenities'])}
Lifestyle: {preferences['lifestyle']}

Here are the listings:
{best_matches}

Please rewrite each description to emphasize the aspects of the property that best match the preferences without changing any factual details. Provide the enhanced descriptions for each listing.
"""

augmented_listings = llm(prompt)

print(augmented_listings)

Listing 2:
Located in the luxurious neighborhood of Beverly Hills, California, this stunning gated estate is priced at $5,000,000. Boasting 5 bedrooms and 6 bathrooms within its spacious 6,000 sqft, this property offers the epitome of luxury living. The highlight of this estate is the beautiful pool, tennis court, and guest house, making it a perfect retreat for those who value extravagance and style.

Listing 8:
Situated in the exclusive Pacific Heights neighborhood of San Francisco, California, this Victorian mansion is priced at $3,500,000. Featuring 6 bedrooms and 4 bathrooms spread across 4,500 sqft, this property exudes elegance and grandeur. The grand staircase, bay windows, and garden add a touch of sophistication to this luxurious home, perfect for those seeking a lavish lifestyle in the heart of the city.

Listing 10:
Perched in the coveted Hollywood Hills neighborhood of Los Angeles, California, this modern villa is listed at $4,000,000. With 5 bedrooms and 4.5 bathrooms enc