This is a starter notebook for the project, you'll have to import the libraries you'll need, you can find a list of the ones available in this workspace in the requirements.txt file in this workspace. 

In [1]:
import os

os.environ["OPENAI_API_KEY"] = "voc-1345590827126677367513467414584d34831.94267042"
os.environ["OPENAI_API_BASE"] = "https://openai.vocareum.com/v1"

from langchain.llms import OpenAI


In [2]:
import openai
import json
import pandas as pd
import os
import random

openai.api_key = os.getenv("OPENAI_API_KEY")

# Define possible values for random selection
neighborhoods = ["Green Oaks", "Riverview", "Mountain Vista", "Lakeside", "City Center", "Casino Area", 
                 "South Airport", "Transportation Center", "Countryside", "Industry Spot"]
prices = ["$500,000", "$750,000", "$1,000,000", "$1,200,000", "$600,000", "$850,000", 
          "$700,000", "$750,000", "$650,000", "$700,000"]
bedrooms = [2, 3, 4, 5]
bathrooms = [1, 2, 2.5, 3]
house_sizes = ["1,500 sqft", "2,000 sqft", "2,500 sqft", "2,200 sqft", "1,800 sqft"]

# Function to create the template with random values
def generate_random_listing():
    return {
        "neighborhood": random.choice(neighborhoods),
        "price": random.choice(prices),
        "bedrooms": random.choice(bedrooms),
        "bathrooms": random.choice(bathrooms),
        "house_size": random.choice(house_sizes)
    }

# Function to generate listing description using OpenAI
def generate_listing_description(listing):
    prompt = f"""
    Generate a realistic real estate listing based on the following details:
    
    Neighborhood: {listing['neighborhood']}
    Price: {listing['price']}
    Bedrooms: {listing['bedrooms']}
    Bathrooms: {listing['bathrooms']}
    House Size: {listing['house_size']}
    
    Description: Provide a detailed property description.
    
    Neighborhood Description: Provide a description of the neighborhood.
    """
    try:
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",  
            messages=[{"role": "user", "content": prompt}],
            max_tokens=300
        )
        full_description = response["choices"][0]["message"]["content"].strip()
        
        # Split the full description into property description and neighborhood description
        description_split = full_description.split("Neighborhood Description:")
        property_description = description_split[0].strip() if len(description_split) > 0 else ""
        neighborhood_description = description_split[1].strip() if len(description_split) > 1 else ""
        
        return property_description, neighborhood_description
    
    except openai.error.OpenAIError as e:
        return f"OpenAI API error: {e}", ""
    except Exception as e:
        return f"An error occurred: {e}", ""

# Generate 10 listings with descriptions
listings = []
for i in range(10):
    listing_details = generate_random_listing()
    listing_description, neighborhood_description = generate_listing_description(listing_details)  
    listing_details["description"] = listing_description  
    listing_details["neighborhood_description"] = neighborhood_description  
    listings.append(listing_details)

# Save listings as a JSON file
with open("listings.json", "w") as f:
    json.dump(listings, f, indent=4)  # Pretty-print JSON

print("Listings generated and saved to 'listings.json'.")

Listings generated and saved to 'listings.json'.


In [3]:
import chromadb
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma

# Initialize ChromaDB client
chroma_client = chromadb.PersistentClient(path="./chroma_db")  

# Create a collection for real estate listings
collection = chroma_client.get_or_create_collection(name="real_estate_listings")

In [4]:
import json

# Initialize OpenAI Embeddings via LangChain
embeddings_model = OpenAIEmbeddings()

# Read generated listings from file
with open("listings.json", "r") as f:
    listings = json.load(f)  

# Ensure ChromaDB collection is initialized
if "collection" not in globals():
    raise ValueError("ChromaDB collection is not initialized!")

# Store each listing with embeddings in ChromaDB
for idx, listing in enumerate(listings):
    if isinstance(listing, dict):  
        text_representation = (
            f"Neighborhood: {listing.get('neighborhood', 'Unknown')}\n"
            f"Price: {listing.get('price', 'Unknown')}\n"
            f"Bedrooms: {listing.get('bedrooms', 'Unknown')}\n"
            f"Bathrooms: {listing.get('bathrooms', 'Unknown')}\n"
            f"House Size: {listing.get('house_size', 'Unknown')}\n\n"
            f"{listing.get('description', 'No description provided')}\n\n"
            f"Neighborhood Description: {listing.get('neighborhood_description', 'No description provided')}"
        )

        # Generate embedding using LangChain
        embedding = embeddings_model.embed_query(text_representation)

        # Store structured metadata properly
        collection.add(
            ids=[str(idx)],  
            embeddings=[embedding],  
            metadatas=[{
                'neighborhood': listing.get('neighborhood', 'Unknown'),
                'price': listing.get('price', 'Unknown'),
                'bedrooms': listing.get('bedrooms', 'Unknown'),
                'bathrooms': listing.get('bathrooms', 'Unknown'),
                'house_size': listing.get('house_size', 'Unknown'),
                'description': listing.get('description', 'No description provided'),
                'neighborhood_description': listing.get('neighborhood_description', 'No description provided')
            }]  
        )
    else:
        print(f" Skipping invalid listing at index {idx}")

print("Listings stored in ChromaDB using LangChain embeddings.")

✅ Listings stored in ChromaDB using LangChain embeddings.


In [7]:
questions = [
    "How big do you want your house to be?",
    "What are 3 most important things for you in choosing this property?",
    "Which amenities would you like?",
    "Which transportation options are important to you?",
    "How urban do you want your neighborhood to be?"
]

# Collect responses
buyer_preferences = {}
for i, question in enumerate(questions):
    answer = input(f"{question} ")  
    buyer_preferences[f"q{i+1}"] = answer 

How big do you want your house to be? A comfortable three-bedroom house with a spacious kitchen and a cozy living room.
What are 3 most important things for you in choosing this property? A quiet neighborhood, good local schools, and convenient shopping options.
Which amenities would you like? A backyard for gardening, a two-car garage, and a modern, energy-efficient heating system.
Which transportation options are important to you? Easy access to a reliable bus line, proximity to a major highway, and bike-friendly roads.
How urban do you want your neighborhood to be? A balance between suburban tranquility and access to urban amenities like restaurants and theaters.


In [8]:
from langchain.embeddings.openai import OpenAIEmbeddings

# Initialize OpenAI Embeddings model
embeddings_model = OpenAIEmbeddings()

# Convert buyer preferences to a single search query
search_query = " ".join(buyer_preferences.values())

# Generate embedding for query
query_embedding = embeddings_model.embed_query(search_query)

In [11]:
search_results = collection.query(
    query_embeddings=[query_embedding],
    n_results=5  
)

match_number = 1

for i,metadata_list in enumerate(search_results.get("metadatas", [])):  
    if metadata_list:  
        for metadata in metadata_list:  
            if isinstance(metadata, dict):  
                print(f"**Match {match_number}:**")
                print(f"**Location:** {metadata.get('neighborhood', 'Not Available')}")
                print(f"**Price:** {metadata.get('price', 'Not Available')}")
                print(f"**Bedrooms:** {metadata.get('bedrooms', 'Not Available')}")
                print(f"**Bathrooms:** {metadata.get('bathrooms', 'Not Available')}")
                print(f"**Size:** {metadata.get('house_size', 'Not Available')}")
                print(f"**Description:** {metadata.get('description', 'No description available')}")
                print(f"**Neighborhood Info:** {metadata.get('neighborhood_description', 'No description available')}")
                print("-" * 50)
                
                match_number += 1
            else:
                print(f"Skipping invalid metadata at index {i}")
    else:
        print(f"No metadata found for match {i + 1}")

**Match 1:**
**Location:** Countryside
**Price:** $750,000
**Bedrooms:** 3
**Bathrooms:** 2
**Size:** 1,800 sqft
**Description:** Welcome to this charming 3 bedroom, 2 bathroom home located in the peaceful Countryside neighborhood. Priced at $750,000, this 1,800 sqft house is perfect for a family looking for a cozy place to call home.

As you enter the property, you are greeted by a spacious living room with a fireplace, perfect for relaxing evenings with loved ones. The kitchen features modern appliances, granite countertops, and plenty of storage space. The dining area overlooks the backyard, where you will find a beautiful deck and a lush lawn, ideal for outdoor entertaining.

The master bedroom includes a walk-in closet and an ensuite bathroom with a luxurious soaking tub. The two additional bedrooms are perfect for children or guests, and share a well-appointed bathroom.

The Countryside neighborhood is known for its tranquil atmosphere and friendly community. Residents enjoy easy

In [12]:
def personalize_and_filter_top_3(buyer_preferences, listings):
    
    if listings and isinstance(listings[0], list):
        listings = listings[0]

    personalized_listings = []
    for listing in listings:
        augmented_description = personalize_listing_description(buyer_preferences, listing)
        personalized_listings.append({
            "listing": listing,
            "augmented_description": augmented_description
        })

    top_3_listings = filter_top_3_listings(personalized_listings, buyer_preferences)
    
    return top_3_listings

# Function to personalize the listing description with buyer preferences
def personalize_listing_description(buyer_preferences, listing):
    prompt = build_prompt(buyer_preferences, listing)
    try:
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "You are a helpful real estate assistant."},
                {"role": "user", "content": prompt}
            ],
            max_tokens=100,
            temperature=0.7
        )

        # Ensure response contains 'choices'
        if "choices" in response and len(response["choices"]) > 0:
            return response["choices"][0]["message"]["content"].strip()
        else:
            print("Warning: API response does not contain 'choices'. Full response:", response)
            return "Description could not be generated."

    except Exception as e:
        print(f" OpenAI API Error: {e}")
        return "Description could not be generated."


# Function to build the prompt based on buyer preferences and listing
def build_prompt(buyer_preferences, listing):
    listing_description = listing["description"]
    preferred_hobbit = buyer_preferences.get("hobbit", None)
    preferred_outdoor = buyer_preferences.get("outdoor_space", None)
    
    prompt = f"""
    A real estate agent is personalizing the description for a property to match a specific buyer's preferences. 

    Property Description: {listing_description}

    Buyer Preferences:
    - hobbit: {preferred_hobbit if preferred_hobbit else 'No specific preference'}
    - Outdoor Space: {preferred_outdoor if preferred_outdoor else 'No specific preference'}

    Please enhance the description to align with the buyer's preferences while ensuring factual accuracy.
    """
    
    return prompt


In [13]:
import openai

def filter_top_3_listings(personalized_listings, buyer_preferences):
    # Implement your filtering criteria, for example, matching the number of bedrooms
    top_3 = sorted(personalized_listings, key=lambda x: score_listing(x, buyer_preferences), reverse=True)[:3]
    return top_3

# Scoring function that ranks listings based on buyer preferences
def score_listing(listing_info, buyer_preferences):
    augmented_description = listing_info["augmented_description"]
    # Score based on preferences, for example, if it matches bedrooms, outdoor space, etc.
    score = 0
    
    if buyer_preferences.get("bedrooms", None) and str(buyer_preferences["bedrooms"]) in augmented_description:
        score += 1
    if buyer_preferences.get("outdoor_space", None) and buyer_preferences["outdoor_space"].lower() in augmented_description.lower():
        score += 1
    
    return score

# Example of how the function is used
buyer_preferences = {
    "hobbit": "cooking",
    "outdoor_space": "garden"
}


listings = search_results.get("metadatas", [])
if len(listings) > 0 and isinstance(listings[0], list):
    listings = listings[0]

# Get personalized descriptions and filtered top 3 listings
top_3_personalized_listings = personalize_and_filter_top_3(buyer_preferences, listings)

for i, listing in enumerate(top_3_personalized_listings):
    print(f"**Match {i + 1}:**")
    print(f"**Description:** {listing['listing'].get('description', 'Not Available')}")
    print(f"**Price:** {listing['listing'].get('price', 'Not Available')}")
    print(f"**Bedrooms:** {listing['listing'].get('bedrooms', 'Not Available')}")
    print(f"**Personality:** {listing['augmented_description']}")
    print("-" * 50)

**Match 1:**
**Description:** Welcome to this charming 3 bedroom, 2 bathroom home located in the peaceful Countryside neighborhood. Priced at $750,000, this 1,800 sqft house is perfect for a family looking for a cozy place to call home.

As you enter the property, you are greeted by a spacious living room with a fireplace, perfect for relaxing evenings with loved ones. The kitchen features modern appliances, granite countertops, and plenty of storage space. The dining area overlooks the backyard, where you will find a beautiful deck and a lush lawn, ideal for outdoor entertaining.

The master bedroom includes a walk-in closet and an ensuite bathroom with a luxurious soaking tub. The two additional bedrooms are perfect for children or guests, and share a well-appointed bathroom.

The Countryside neighborhood is known for its tranquil atmosphere and friendly community. Residents enjoy easy access to parks, hiking trails, and top-rated schools. With shopping and dining options nearby, thi