In [1]:
# Import Statements

import os
import openai
from pydantic import BaseModel, Field, NonNegativeInt
from typing import List, Optional
from langchain.output_parsers import PydanticOutputParser
from langchain.prompts import PromptTemplate
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# STEP 0: INITIALIZE THE LLM

In [2]:
os.environ["OPENAI_API_KEY"] = "voc-176713170812667743680626901c4a6b06bd0.19164315"
os.environ["OPENAI_API_BASE"] = "https://openai.vocareum.com/v1"

In [3]:
# Select the LLM Model
MODEL = "gpt-3.5-turbo"

In [4]:
# Initialze the LLM client
openai.api_key=os.getenv("OPENAI_API_KEY")
openai.api_base=os.getenv("OPENAI_API_BASE")

## STEP 1: CREATE THE CSV

In [5]:
# Example Instruction and Sample
INSTRUCTION = "Generate a CSV file with at least 10 real estate listing."
SAMPLE_LISTING = \
"""
Neighborhood: Green Oaks
Price: $800,000
Bedrooms: 3
Bathrooms: 2
House Size: 2,000 sqft

Description: Welcome to this eco-friendly oasis nestled in the heart of Green Oaks. This charming 3-bedroom, 2-bathroom home boasts energy-efficient features such as solar panels and a well-insulated structure. Natural light floods the living spaces, highlighting the beautiful hardwood floors and eco-conscious finishes. The open-concept kitchen and dining area lead to a spacious backyard with a vegetable garden, perfect for the eco-conscious family. Embrace sustainable living without compromising on style in this Green Oaks gem.

Neighborhood Description: Green Oaks is a close-knit, environmentally-conscious community with access to organic grocery stores, community gardens, and bike paths. Take a stroll through the nearby Green Oaks Park or grab a cup of coffee at the cozy Green Bean Cafe. With easy access to public transportation and bike lanes, commuting is a breeze.
"""

In [6]:
# Create the RealEstate Class
class RealEstate(BaseModel):
    Neighborhood: str = Field(description="The neighborhood where the property is located")
    price: NonNegativeInt = Field(description="THe price of the property in USD")
    bedrooms: NonNegativeInt = Field(description="The number of bedrooms in the property")
    bathrooms: NonNegativeInt = Field(description="The number of bathrooms in the property")
    house_size: NonNegativeInt = Field(description="The size of the house in square feet")
    description: str = Field(description="A description of the property")
    neighborhood_description: str = Field(description="A description of the neighborhood")

In [7]:
class Listings(BaseModel):
    listings: List[RealEstate] = Field(description="A list containing real estates.")

In [8]:
# Create the parser
parser = PydanticOutputParser(pydantic_object=Listings)

In [9]:
# Initialize the Prompt Template
prompt_template = PromptTemplate(
    template="{instruction}\n{sample}\n{format_instructions}",
    input_variables=["instruction", "sample"],
    partial_variables={"format_instructions": parser.get_format_instructions}
)

print(prompt_template)

input_variables=['instruction', 'sample'] partial_variables={'format_instructions': <bound method PydanticOutputParser.get_format_instructions of PydanticOutputParser(pydantic_object=<class '__main__.Listings'>)>} template='{instruction}\n{sample}\n{format_instructions}'


In [10]:
# Create the inital query
initial_query = prompt_template.format(instruction=INSTRUCTION, sample=SAMPLE_LISTING)

print(initial_query)

Generate a CSV file with at least 10 real estate listing.

Neighborhood: Green Oaks
Price: $800,000
Bedrooms: 3
Bathrooms: 2
House Size: 2,000 sqft

Description: Welcome to this eco-friendly oasis nestled in the heart of Green Oaks. This charming 3-bedroom, 2-bathroom home boasts energy-efficient features such as solar panels and a well-insulated structure. Natural light floods the living spaces, highlighting the beautiful hardwood floors and eco-conscious finishes. The open-concept kitchen and dining area lead to a spacious backyard with a vegetable garden, perfect for the eco-conscious family. Embrace sustainable living without compromising on style in this Green Oaks gem.

Neighborhood Description: Green Oaks is a close-knit, environmentally-conscious community with access to organic grocery stores, community gardens, and bike paths. Take a stroll through the nearby Green Oaks Park or grab a cup of coffee at the cozy Green Bean Cafe. With easy access to public transportation and bik

In [11]:
# Get the LLM Response
response = openai.ChatCompletion.create(
    model=MODEL,
    temperature=0.0,
    messages=[
        {
            "role": "user",
            "content": initial_query
        }
    ]
)

In [12]:
print(response.choices[0].message.content)

{
  "listings": [
    {
      "Neighborhood": "Green Oaks",
      "price": 800000,
      "bedrooms": 3,
      "bathrooms": 2,
      "house_size": 2000,
      "description": "Welcome to this eco-friendly oasis nestled in the heart of Green Oaks. This charming 3-bedroom, 2-bathroom home boasts energy-efficient features such as solar panels and a well-insulated structure. Natural light floods the living spaces, highlighting the beautiful hardwood floors and eco-conscious finishes. The open-concept kitchen and dining area lead to a spacious backyard with a vegetable garden, perfect for the eco-conscious family. Embrace sustainable living without compromising on style in this Green Oaks gem.",
      "neighborhood_description": "Green Oaks is a close-knit, environmentally-conscious community with access to organic grocery stores, community gardens, and bike paths. Take a stroll through the nearby Green Oaks Park or grab a cup of coffee at the cozy Green Bean Cafe. With easy access to public 

In [13]:
# Parse the LLM Response using the parser
response_content = response.choices[0].message.content
parsed_content = parser.parse(response_content)

In [15]:
# Create the DataFrame from the Parsed Content
listings = parsed_content.listings
df = pd.DataFrame([listing.dict() for listing in listings])
df.head()

Unnamed: 0,Neighborhood,price,bedrooms,bathrooms,house_size,description,neighborhood_description
0,Green Oaks,800000,3,2,2000,Welcome to this eco-friendly oasis nestled in ...,"Green Oaks is a close-knit, environmentally-co..."
1,Sunnyvale,950000,4,3,2500,Located in the desirable neighborhood of Sunny...,"Sunnyvale is known for its top-rated schools, ..."
2,Brooklyn Heights,1200000,5,4,3500,"Welcome to this stunning 5-bedroom, 4-bathroom...",Brooklyn Heights is a picturesque neighborhood...
3,Pacific Palisades,3500000,6,5,5000,Situated in the prestigious neighborhood of Pa...,Pacific Palisades is a sought-after community ...
4,South Beach,2000000,3,3,2200,"Welcome to this modern 3-bedroom, 3-bathroom c...",South Beach is a dynamic neighborhood known fo...


In [16]:
# Save the DataFrame to a CSV
df.to_csv("Real_Estates.csv")

## STEP 2: CREATING THE VECTOR DATABASE USING TF-IDF

In [17]:
# Read the CSV
df = pd.read_csv("Real_Estates.csv")

In [18]:
def document(raw):
    """Convert the listings into text representation for TF-IDF"""
    return " ".join([
        f"""
            \nNeigborhood: {str(raw["Neighborhood"])}
            \nPrice: ${str(raw["price"])}
            \nBedrooms: {str(raw["bedrooms"])}
            \nBathrooms: {str(raw["bathrooms"])}
            \nHouse Size: {str(raw["house_size"])}
            \nProperty Description: {str(raw["description"])}
            \nNeighborhood Description: {str(raw["neighborhood_description"])}
        """
    ])

In [19]:
documents_from_csv = [document(row) for _, row in df.iterrows()]
print(documents_from_csv[1])


            
Neigborhood: Sunnyvale
            
Price: $950000
            
Bedrooms: 4
            
Bathrooms: 3
            
House Size: 2500
            
Property Description: Located in the desirable neighborhood of Sunnyvale, this 4-bedroom, 3-bathroom home offers spacious living areas and a beautifully landscaped backyard. The updated kitchen features stainless steel appliances and granite countertops, perfect for entertaining guests. Enjoy the California sunshine in the private backyard oasis with a sparkling pool and outdoor kitchen. This home is perfect for families looking for a blend of comfort and luxury.
            
Neighborhood Description: Sunnyvale is known for its top-rated schools, vibrant downtown area, and easy access to major tech companies. Residents can enjoy hiking and biking trails at nearby parks or explore the local farmers' market for fresh produce and artisan goods.
        


In [20]:
# Initialize the Vectorizer and Fit the data
vectorizer = TfidfVectorizer(stop_words="english")
tfidf_matrix = vectorizer.fit_transform(documents_from_csv)

## STEP 3: EXTRACTING THE USER PREFERENCES

In [21]:
# Define the Preference model
class Prefs:
    def __init__(self, budget, bedrooms, bathrooms, minimum_area, property_preferences, neighborhood_preferences):
        self.budget = budget or None,
        self.bedrooms = bedrooms or None,
        self.bathrooms = bathrooms or None,
        self.minimum_area = minimum_area or None,
        self.property_preferences = property_preferences or "",
        self.neighborhood_preferences = neighborhood_preferences or ""

In [44]:
# Take user input
print("Let us know about your dream property!")
budget = int(input("Enter your budget"))
bedrooms = int(input("How many bedrooms do you want?"))
bathrooms = int(input("How many bathrooms do you want?"))
minimum_area = int(input("How large of a house do you want (in sqft)?"))
property_preferences = input("Any preferences for the property?")
neighborhood_preferences = input("Any preferences for the neighborhood?")

Let us know about your dream property!
Enter your budget800000
How many bedrooms do you want?2
How many bathrooms do you want?2
How large of a house do you want (in sqft)?2000
Any preferences for the property?
Any preferences for the neighborhood?


In [45]:
prefs = Prefs(
    budget=budget, 
    bedrooms=bedrooms, 
    bathrooms=bathrooms,
    minimum_area=minimum_area,
    property_preferences=property_preferences,
    neighborhood_preferences=neighborhood_preferences
)

## STEP 4: SEMANTIC SEARCH USING TF-IDF AND FILTERING

In [46]:
query = " ".join([
    f"${prefs.budget[0]}" if prefs.budget else "",
    f"{prefs.bedrooms[0]} bedrooms" if prefs.bedrooms else "",
    f"{prefs.bathrooms[0]} bathrooms" if prefs.bathrooms else "",
    f"{prefs.minimum_area[0]} sqft" if prefs.minimum_area else "",
    f"{prefs.property_preferences[0]}" if prefs.property_preferences else "",
    f"{prefs.neighborhood_preferences}" if prefs.property_preferences else ""
])

In [47]:
print(query)

$800000 2 bedrooms 2 bathrooms 2000 sqft  


In [48]:
query_vec = vectorizer.transform([query])

In [49]:
scores = cosine_similarity(query_vec, tfidf_matrix)[0]

In [50]:
df["similarity"] = scores
ranked_df = df.sort_values(by="similarity", ascending=False)

In [51]:
ranked_df.head(1)

Unnamed: 0.1,Unnamed: 0,Neighborhood,price,bedrooms,bathrooms,house_size,description,neighborhood_description,similarity
0,0,Green Oaks,800000,3,2,2000,Welcome to this eco-friendly oasis nestled in ...,"Green Oaks is a close-knit, environmentally-co...",0.127948


In [52]:
filtered = []

for _, row in ranked_df.iterrows():
    ok = True

    price, bedrooms, bathrooms, size = int(row["price"]), int(row["bedrooms"]), int(row["bathrooms"]), int(row["house_size"])

    if prefs.budget and prefs.budget[0] < price:
        ok = False
    if prefs.bedrooms and prefs.bedrooms[0] > bedrooms:
        ok = False
    if prefs.bathrooms and prefs.bathrooms[0] > bathrooms:
        ok = False
    if prefs.minimum_area and prefs.minimum_area[0] > size:
        of = False

    if ok:
        filtered.append(row)

if not filtered:
    print("No matches found!")

## STEP 5: PERSONALIZED RECOMMENDATION BY AN LLM

In [53]:
final_query = None

if filtered:
    best = filtered[0].drop("similarity").to_dict()

    final_query = f"""
    Here are the details of a property that is very similar to the preferences of a buyer:
    
    {best}

    ---

    You are LLMRealtor. Your task is to convince the buyer that this property is the best suited for them like a Real Estate Agent.
    Be very convincing using a warm, polite tone. Give them detailed introspection about the positive features of this property.
    """

print(final_query)


    Here are the details of a property that is very similar to the preferences of a buyer:
    
    {'Unnamed: 0': 0, 'Neighborhood': 'Green Oaks', 'price': 800000, 'bedrooms': 3, 'bathrooms': 2, 'house_size': 2000, 'description': 'Welcome to this eco-friendly oasis nestled in the heart of Green Oaks. This charming 3-bedroom, 2-bathroom home boasts energy-efficient features such as solar panels and a well-insulated structure. Natural light floods the living spaces, highlighting the beautiful hardwood floors and eco-conscious finishes. The open-concept kitchen and dining area lead to a spacious backyard with a vegetable garden, perfect for the eco-conscious family. Embrace sustainable living without compromising on style in this Green Oaks gem.', 'neighborhood_description': 'Green Oaks is a close-knit, environmentally-conscious community with access to organic grocery stores, community gardens, and bike paths. Take a stroll through the nearby Green Oaks Park or grab a cup of coffee at 

In [54]:
response = openai.ChatCompletion.create(
    model=MODEL,
    temperature=0.7,
    messages=[
        {
            "role": "user",
            "content": final_query
        }
    ]
)

In [55]:
print(response.choices[0].message.content)

Hello there, 

I am thrilled to present to you a property that truly embodies your eco-conscious lifestyle preferences. This charming home located in the heart of Green Oaks is a true gem that I believe will exceed your expectations.

One of the standout features of this property is its energy-efficient design, with solar panels and a well-insulated structure that will not only reduce your carbon footprint but also save you money on energy bills. The abundance of natural light flowing through the living spaces showcases the beautiful hardwood floors and eco-conscious finishes, creating a warm and inviting atmosphere.

The open-concept kitchen and dining area leading to a spacious backyard with a vegetable garden is the perfect setting for you to embrace sustainable living without compromising on style. Imagine growing your own organic produce right in your backyard, truly a dream for an eco-conscious family like yours.

Moreover, the neighborhood of Green Oaks is a close-knit community