In [None]:
# Install pandas
pip install --quiet pandas

# STEP 1: SETUP OPENAI API KEY AND BASE

In [23]:
# All import statements used in the Step
import openai
import os

In [None]:
os.environ["OPENAI_API_KEY"] = "YOUR-API-KEY-HERE"
os.environ["OPENAI_API_BASE"] = "https://openai.vocareum.com/v1"

In [25]:
openai.api_key = os.getenv("OPENAI_API_KEY")
openai.api_base = os.getenv("OPENAI_API_BASE")

In [26]:
# GPT model used throughout the project
MODEL = "gpt-3.5-turbo"

# STEP 2: CREATE THE REAL ESTATE LISTINGS FOR THE CSV

In [4]:
# All import statements used in this Step
from pydantic import BaseModel, Field, NonNegativeInt
from typing import List
from langchain.output_parsers import PydanticOutputParser
from langchain.prompts import PromptTemplate

In [5]:
# Example Instruction and Sample
INSTRUCTION = "Generate a CSV file with at least 10 real estate listing."
SAMPLE_LISTING = \
"""
Neighborhood: Green Oaks
Price: $800,000
Bedrooms: 3
Bathrooms: 2
House Size: 2,000 sqft

Description: Welcome to this eco-friendly oasis nestled in the heart of Green Oaks. This charming 3-bedroom, 2-bathroom home boasts energy-efficient features such as solar panels and a well-insulated structure. Natural light floods the living spaces, highlighting the beautiful hardwood floors and eco-conscious finishes. The open-concept kitchen and dining area lead to a spacious backyard with a vegetable garden, perfect for the eco-conscious family. Embrace sustainable living without compromising on style in this Green Oaks gem.

Neighborhood Description: Green Oaks is a close-knit, environmentally-conscious community with access to organic grocery stores, community gardens, and bike paths. Take a stroll through the nearby Green Oaks Park or grab a cup of coffee at the cozy Green Bean Cafe. With easy access to public transportation and bike lanes, commuting is a breeze.
"""

In [6]:
class RealEstates(BaseModel):
    neighborhood: str = Field(description="The neighborhood in which the property is located.")
    price: NonNegativeInt = Field(description="The price of the property in USD.")
    bedrooms: NonNegativeInt = Field(description="The number of bedrooms in the property.")
    bathrooms: NonNegativeInt = Field(description="The number of bathrooms in the property.")
    house_size: NonNegativeInt = Field(description="The size of the property in sqft.")
    description: str = Field(description="A brief description of the house describing all of its pros.")
    neighborhood_description: str = Field(description="A brief description of the neighborhood describing all of its pros.")

In [7]:
class Listings(BaseModel):
    listings: List[RealEstates] = Field(description="A list containing Real Estate details.")

In [8]:
parser = PydanticOutputParser(pydantic_object=Listings)

In [9]:
prompt_template = PromptTemplate(
    template="{instruction}\n{sample}\n{format_instructions}",
    input_variables=["instruction", "sample"],
    partial_variables={"format_instructions": parser.get_format_instructions}
)

print(prompt_template)

input_variables=['instruction', 'sample'] partial_variables={'format_instructions': <bound method PydanticOutputParser.get_format_instructions of PydanticOutputParser(pydantic_object=<class '__main__.Listings'>)>} template='{instruction}\n{sample}\n{format_instructions}'


In [10]:
initial_query = prompt_template.format(instruction=INSTRUCTION, sample=SAMPLE_LISTING)
print(initial_query)

Generate a CSV file with at least 10 real estate listing.

Neighborhood: Green Oaks
Price: $800,000
Bedrooms: 3
Bathrooms: 2
House Size: 2,000 sqft

Description: Welcome to this eco-friendly oasis nestled in the heart of Green Oaks. This charming 3-bedroom, 2-bathroom home boasts energy-efficient features such as solar panels and a well-insulated structure. Natural light floods the living spaces, highlighting the beautiful hardwood floors and eco-conscious finishes. The open-concept kitchen and dining area lead to a spacious backyard with a vegetable garden, perfect for the eco-conscious family. Embrace sustainable living without compromising on style in this Green Oaks gem.

Neighborhood Description: Green Oaks is a close-knit, environmentally-conscious community with access to organic grocery stores, community gardens, and bike paths. Take a stroll through the nearby Green Oaks Park or grab a cup of coffee at the cozy Green Bean Cafe. With easy access to public transportation and bik

In [11]:
llm_response = openai.ChatCompletion.create(
    model=MODEL,
    temperature=0.0,
    messages=[
        {
            "role": "user",
            "content": initial_query
        }
    ]
)

In [12]:
response_content = llm_response.choices[0].message.content
print(response_content)

{
  "listings": [
    {
      "neighborhood": "Green Oaks",
      "price": 800000,
      "bedrooms": 3,
      "bathrooms": 2,
      "house_size": 2000,
      "description": "Welcome to this eco-friendly oasis nestled in the heart of Green Oaks. This charming 3-bedroom, 2-bathroom home boasts energy-efficient features such as solar panels and a well-insulated structure. Natural light floods the living spaces, highlighting the beautiful hardwood floors and eco-conscious finishes. The open-concept kitchen and dining area lead to a spacious backyard with a vegetable garden, perfect for the eco-conscious family. Embrace sustainable living without compromising on style in this Green Oaks gem.",
      "neighborhood_description": "Green Oaks is a close-knit, environmentally-conscious community with access to organic grocery stores, community gardens, and bike paths. Take a stroll through the nearby Green Oaks Park or grab a cup of coffee at the cozy Green Bean Cafe. With easy access to public 

# STEP 3: CREATE THE CSV FROM THE LISTINGS

In [13]:
# All import statements used in this step
import pandas as pd

In [14]:
parsed_content = parser.parse(response_content)

In [15]:
listings = parsed_content.listings

In [16]:
df = pd.DataFrame([listing.dict() for listing in listings])
df.head()

Unnamed: 0,neighborhood,price,bedrooms,bathrooms,house_size,description,neighborhood_description
0,Green Oaks,800000,3,2,2000,Welcome to this eco-friendly oasis nestled in ...,"Green Oaks is a close-knit, environmentally-co..."
1,Sunnyvale,950000,4,3,2500,"Beautiful 4-bedroom, 3-bathroom home located i...",Sunnyvale is known for its family-friendly atm...
2,Downtown Los Angeles,1200000,2,2,1800,"Luxurious 2-bedroom, 2-bathroom condo in the h...",Downtown Los Angeles is a bustling urban cente...
3,Brooklyn Heights,1500000,5,4,3000,"Stunning 5-bedroom, 4-bathroom brownstone in t...",Brooklyn Heights is a picturesque neighborhood...
4,Pacific Palisades,2500000,6,5,4000,"Magnificent 6-bedroom, 5-bathroom estate in th...",Pacific Palisades is a prestigious coastal com...


In [17]:
df.to_csv("Real_Estates.csv")

# STEP 4: CREATE THE CHROMA DB VECTOR DATABASE

In [18]:
# All import statements used in this step
import pandas as pd
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.schema import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma

In [19]:
df = pd.read_csv("Real_Estates.csv")

In [20]:
CHROMA_PATH = "content/chroma"

In [27]:
embedding_function = OpenAIEmbeddings()

In [28]:
documents = []
for index, row in df.iterrows():
    documents.append(Document(page_content=row["description"], metadata={"id": str(index)}))
print(f"Successfully created {len(documents)} documents from CSV.")

Successfully created 10 documents from CSV.


In [29]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=300,
    chunk_overlap=100,
    length_function=len,
    add_start_index=True
)

In [30]:
chunks = text_splitter.split_documents(documents)
print(f"Successfully created {len(chunks)} chunks from {len(documents)} documents.")

Successfully created 16 chunks from 10 documents.


In [31]:
if os.path.exists(CHROMA_PATH):
    shutil.rmtree(CHROMA_PATH)

In [34]:
db = Chroma.from_documents(
    documents=chunks,
    embedding=embedding_function,
    persist_directory=CHROMA_PATH
)

In [35]:
db.persist()
print(f"Successfully created ChromaDB Vector Database at {CHROMA_PATH}")

Successfully created ChromaDB Vector Database at content/chroma


# STEP 5: SEMANTIC SEARCH USING USER INPUT

In [38]:
# All Import statements used in this step
from langchain.prompts import ChatPromptTemplate

In [36]:
user_query = input("Describe your dream house: ")

Describe your dream house: A comfortable three-bedroom house with a spacious kitchen and a cozy living room.


In [37]:
BASIC_PROMPT_TEMPLATE = """
Based on the following context:

{context}

---

Answer the following question: {question}
"""

In [43]:
def semantic_search(question, template):
    db = Chroma(
        persist_directory=CHROMA_PATH,
        embedding_function=embedding_function
    )
    
    top_3 = db.similarity_search_with_relevance_scores(query=question, k=3)
    if len(top_3) == 0 or top_3[0][1] < 0.7:
        print("Unable to find relevant matches!")
        return
    else:
        context_text = "\n----\n".join([doc.page_content for doc, _score in top_3])
        prompt_template = PromptTemplate(
            template=template,
            input_variables=["context", "question"]
        )
        prompt = prompt_template.format(context=context_text, question=question)
        print(f"Generated Prompt:\n{prompt}")
        return prompt

In [44]:
result = semantic_search(question=user_query, template=BASIC_PROMPT_TEMPLATE)

Generated Prompt:

Based on the following context:

Beautiful 4-bedroom, 3-bathroom home located in the desirable neighborhood of Sunnyvale. This spacious property features a modern kitchen, luxurious bathrooms, and a large backyard perfect for entertaining. Enjoy the convenience of nearby shopping centers, parks, and top-rated schools.
----
Charming 4-bedroom, 3-bathroom townhouse in the historic neighborhood of Georgetown. This elegant home features original details, modern updates, and a private courtyard perfect for outdoor entertaining. Experience the rich history and culture of Georgetown with easy access to boutique shops, cafes,
----
Modern 3-bedroom, 3-bathroom condo in the vibrant neighborhood of South Beach. This sleek unit features an open floor plan, designer finishes, and panoramic views of the bay. Experience luxury living with access to trendy restaurants, nightlife, and cultural attractions.

---

Answer the following question: A comfortable three-bedroom house with a 

In [49]:
llm_response = openai.ChatCompletion.create(
    model=MODEL,
    temperature=0.0,
    messages=[
        {
            "role": "user",
            "content": result
        }
    ]
)

In [50]:
print(llm_response.choices[0].message.content)

The third option, a modern 3-bedroom, 3-bathroom condo in the vibrant neighborhood of South Beach, may not be the best fit for someone looking for a comfortable three-bedroom house with a spacious kitchen and a cozy living room. The first option, a beautiful 4-bedroom, 3-bathroom home located in the desirable neighborhood of Sunnyvale, would likely be a better match for those specific criteria.


### AUGMENTING SEARCH USING SYSTEM PROMPT

In [51]:
system_prompt = """
You are LLMRealtor, a highly experienced Real Estate Agent who has sold hundreds of properties.
You are polite, warm and friendly.
You will be given a three Real Estates that are the closest to a buyer's preferences.
Your task is to positively convince them that these three properties are the best for them and give them proper reasons to buy one of these properties. 
**Ultimately, suggest the best property according to the *buyer's preferences* provided.**
"""

In [54]:
AUGMENTED_PROMPT = """
REAL ESTATES CLOSEST TO THE BUYER'S PREFERENCES:

{context}

---

BUYER'S ORIGINAL PREFERENCE: {question}
"""

In [55]:
result = semantic_search(question=user_query, template=AUGMENTED_PROMPT)

Generated Prompt:

REAL ESTATES CLOSEST TO THE BUYER'S PREFERENCES:

Beautiful 4-bedroom, 3-bathroom home located in the desirable neighborhood of Sunnyvale. This spacious property features a modern kitchen, luxurious bathrooms, and a large backyard perfect for entertaining. Enjoy the convenience of nearby shopping centers, parks, and top-rated schools.
----
Charming 4-bedroom, 3-bathroom townhouse in the historic neighborhood of Georgetown. This elegant home features original details, modern updates, and a private courtyard perfect for outdoor entertaining. Experience the rich history and culture of Georgetown with easy access to boutique shops, cafes,
----
Modern 3-bedroom, 3-bathroom condo in the vibrant neighborhood of South Beach. This sleek unit features an open floor plan, designer finishes, and panoramic views of the bay. Experience luxury living with access to trendy restaurants, nightlife, and cultural attractions.

---

BUYER'S ORIGINAL PREFERENCE: A comfortable three-bedroo

In [56]:
llm_response = openai.ChatCompletion.create(
    model=MODEL,
    temperature=0.7,
    messages=[
        {
            "role": "system",
            "content": system_prompt
        },
        {
            "role": "user",
            "content": result
        }
    ]
)

In [57]:
print(llm_response.choices[0].message.content)

Hello! It's a pleasure to assist you in finding your perfect home. Based on your preferences for a comfortable three-bedroom house with a spacious kitchen and a cozy living room, I have selected three properties that closely match what you are looking for.

1. The beautiful 4-bedroom, 3-bathroom home in Sunnyvale offers a spacious kitchen and a cozy living room, perfect for relaxing with your loved ones. The large backyard provides a great space for entertaining and enjoying the outdoors.

2. The charming 4-bedroom, 3-bathroom townhouse in Georgetown also features a spacious kitchen and a cozy living room. The private courtyard adds a touch of outdoor charm, ideal for intimate gatherings and relaxation.

3. The modern 3-bedroom, 3-bathroom condo in South Beach may have one less bedroom, but it boasts a spacious open floor plan with a designer kitchen and a comfortable living area. The panoramic views of the bay add a unique touch to the property.

Considering your preference for a comf