In [1]:
# Install pandas
!pip install --quiet pandas

# STEP 1: SETUP OPENAI API KEY AND BASE

In [2]:
# All import statements used in the Step
import openai
import os

In [3]:
os.environ["OPENAI_API_KEY"] = "voc-1682785691266774337744690b15dbca9a82.58870878"
os.environ["OPENAI_API_BASE"] = "https://openai.vocareum.com/v1"

In [4]:
openai.api_key = os.getenv("OPENAI_API_KEY")
openai.api_base = os.getenv("OPENAI_API_BASE")

In [5]:
# GPT model used throughout the project
MODEL = "gpt-3.5-turbo"

# STEP 2: CREATE THE REAL ESTATE LISTINGS FOR THE CSV

In [6]:
# All import statements used in this Step
from pydantic import BaseModel, Field, NonNegativeInt
from typing import List
from langchain.output_parsers import PydanticOutputParser
from langchain.prompts import PromptTemplate

In [7]:
# Example Instruction and Sample
INSTRUCTION = "Generate a CSV file with at least 10 real estate listing."
SAMPLE_LISTING = \
"""
Neighborhood: Green Oaks
Price: $800,000
Bedrooms: 3
Bathrooms: 2
House Size: 2,000 sqft

Description: Welcome to this eco-friendly oasis nestled in the heart of Green Oaks. This charming 3-bedroom, 2-bathroom home boasts energy-efficient features such as solar panels and a well-insulated structure. Natural light floods the living spaces, highlighting the beautiful hardwood floors and eco-conscious finishes. The open-concept kitchen and dining area lead to a spacious backyard with a vegetable garden, perfect for the eco-conscious family. Embrace sustainable living without compromising on style in this Green Oaks gem.

Neighborhood Description: Green Oaks is a close-knit, environmentally-conscious community with access to organic grocery stores, community gardens, and bike paths. Take a stroll through the nearby Green Oaks Park or grab a cup of coffee at the cozy Green Bean Cafe. With easy access to public transportation and bike lanes, commuting is a breeze.
"""

In [8]:
class RealEstates(BaseModel):
    neighborhood: str = Field(description="The neighborhood in which the property is located.")
    price: NonNegativeInt = Field(description="The price of the property in USD.")
    bedrooms: NonNegativeInt = Field(description="The number of bedrooms in the property.")
    bathrooms: NonNegativeInt = Field(description="The number of bathrooms in the property.")
    house_size: NonNegativeInt = Field(description="The size of the property in sqft.")
    description: str = Field(description="A brief description of the house describing all of its pros.")
    neighborhood_description: str = Field(description="A brief description of the neighborhood describing all of its pros.")

In [9]:
class Listings(BaseModel):
    listings: List[RealEstates] = Field(description="A list containing Real Estate details.")

In [10]:
parser = PydanticOutputParser(pydantic_object=Listings)

In [11]:
prompt_template = PromptTemplate(
    template="{instruction}\n{sample}\n{format_instructions}",
    input_variables=["instruction", "sample"],
    partial_variables={"format_instructions": parser.get_format_instructions}
)

print(prompt_template)

input_variables=['instruction', 'sample'] partial_variables={'format_instructions': <bound method PydanticOutputParser.get_format_instructions of PydanticOutputParser(pydantic_object=<class '__main__.Listings'>)>} template='{instruction}\n{sample}\n{format_instructions}'


In [12]:
initial_query = prompt_template.format(instruction=INSTRUCTION, sample=SAMPLE_LISTING)
print(initial_query)

Generate a CSV file with at least 10 real estate listing.

Neighborhood: Green Oaks
Price: $800,000
Bedrooms: 3
Bathrooms: 2
House Size: 2,000 sqft

Description: Welcome to this eco-friendly oasis nestled in the heart of Green Oaks. This charming 3-bedroom, 2-bathroom home boasts energy-efficient features such as solar panels and a well-insulated structure. Natural light floods the living spaces, highlighting the beautiful hardwood floors and eco-conscious finishes. The open-concept kitchen and dining area lead to a spacious backyard with a vegetable garden, perfect for the eco-conscious family. Embrace sustainable living without compromising on style in this Green Oaks gem.

Neighborhood Description: Green Oaks is a close-knit, environmentally-conscious community with access to organic grocery stores, community gardens, and bike paths. Take a stroll through the nearby Green Oaks Park or grab a cup of coffee at the cozy Green Bean Cafe. With easy access to public transportation and bik

In [13]:
llm_response = openai.ChatCompletion.create(
    model=MODEL,
    temperature=0.0,
    messages=[
        {
            "role": "user",
            "content": initial_query
        }
    ]
)

In [14]:
response_content = llm_response.choices[0].message.content
print(response_content)

{
  "listings": [
    {
      "neighborhood": "Green Oaks",
      "price": 800000,
      "bedrooms": 3,
      "bathrooms": 2,
      "house_size": 2000,
      "description": "Welcome to this eco-friendly oasis nestled in the heart of Green Oaks. This charming 3-bedroom, 2-bathroom home boasts energy-efficient features such as solar panels and a well-insulated structure. Natural light floods the living spaces, highlighting the beautiful hardwood floors and eco-conscious finishes. The open-concept kitchen and dining area lead to a spacious backyard with a vegetable garden, perfect for the eco-conscious family. Embrace sustainable living without compromising on style in this Green Oaks gem.",
      "neighborhood_description": "Green Oaks is a close-knit, environmentally-conscious community with access to organic grocery stores, community gardens, and bike paths. Take a stroll through the nearby Green Oaks Park or grab a cup of coffee at the cozy Green Bean Cafe. With easy access to public 

# STEP 3: CREATE THE CSV FROM THE LISTINGS

In [15]:
# All import statements used in this step
import pandas as pd

In [16]:
parsed_content = parser.parse(response_content)

In [17]:
listings = parsed_content.listings

In [18]:
df = pd.DataFrame([listing.dict() for listing in listings])
df.head()

Unnamed: 0,neighborhood,price,bedrooms,bathrooms,house_size,description,neighborhood_description
0,Green Oaks,800000,3,2,2000,Welcome to this eco-friendly oasis nestled in ...,"Green Oaks is a close-knit, environmentally-co..."
1,Sunnyvale,950000,4,3,2500,"Beautiful 4-bedroom, 3-bathroom home located i...","Sunnyvale is known for its top-rated schools, ..."
2,Willow Glen,1200000,5,4,3500,"Luxurious 5-bedroom, 4-bathroom estate in the ...",Willow Glen is known for its tree-lined street...
3,Los Gatos,1800000,4,3,3000,"Stunning 4-bedroom, 3-bathroom home in the exc...","Los Gatos is known for its upscale shops, gour..."
4,Palo Alto,2500000,6,5,4000,"Modern 6-bedroom, 5-bathroom home in the prest...","Palo Alto is known for its top-rated schools, ..."


In [19]:
df.to_csv("Real_Estates.csv")

# STEP 4: CREATE THE CHROMA DB VECTOR DATABASE

In [20]:
# All import statements used in this step
import pandas as pd
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.schema import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma

In [28]:
import shutil
import os

if os.path.exists(CHROMA_PATH):
    shutil.rmtree(CHROMA_PATH)

In [21]:
df = pd.read_csv("Real_Estates.csv")

In [22]:
CHROMA_PATH = "content/chroma"

In [23]:
embedding_function = OpenAIEmbeddings()

In [24]:
documents = []
for index, row in df.iterrows():
    documents.append(Document(page_content=row["description"], metadata={"id": str(index)}))
print(f"Successfully created {len(documents)} documents from CSV.")

Successfully created 10 documents from CSV.


In [25]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=300,
    chunk_overlap=100,
    length_function=len,
    add_start_index=True
)

In [26]:
chunks = text_splitter.split_documents(documents)
print(f"Successfully created {len(chunks)} chunks from {len(documents)} documents.")

Successfully created 20 chunks from 10 documents.


In [29]:
if os.path.exists(CHROMA_PATH):
    shutil.rmtree(CHROMA_PATH)

In [30]:
db = Chroma.from_documents(
    documents=chunks,
    embedding=embedding_function,
    persist_directory=CHROMA_PATH
)

In [31]:
db.persist()
print(f"Successfully created ChromaDB Vector Database at {CHROMA_PATH}")

Successfully created ChromaDB Vector Database at content/chroma


# STEP 5: SEMANTIC SEARCH USING USER INPUT

In [32]:
# All Import statements used in this step
from langchain.prompts import ChatPromptTemplate

In [33]:
user_query = input("Describe your dream house: ")

Describe your dream house: A comfortable three-bedroom house with a spacious kitchen and a cozy living room.


In [34]:
BASIC_PROMPT_TEMPLATE = """
Based on the following context:

{context}

---

Answer the following question: {question}
"""

In [35]:
def semantic_search(question, template):
    db = Chroma(
        persist_directory=CHROMA_PATH,
        embedding_function=embedding_function
    )
    
    top_3 = db.similarity_search_with_relevance_scores(query=question, k=3)
    if len(top_3) == 0 or top_3[0][1] < 0.7:
        print("Unable to find relevant matches!")
        return
    else:
        context_text = "\n----\n".join([doc.page_content for doc, _score in top_3])
        prompt_template = PromptTemplate(
            template=template,
            input_variables=["context", "question"]
        )
        prompt = prompt_template.format(context=context_text, question=question)
        print(f"Generated Prompt:\n{prompt}")
        return prompt

In [36]:
result = semantic_search(question=user_query, template=BASIC_PROMPT_TEMPLATE)

Generated Prompt:

Based on the following context:

With a spacious layout and state-of-the-art features, this home is perfect for those who appreciate contemporary luxury.
----
BBQ area. With timeless design and modern amenities, this home offers a perfect blend of comfort and sophistication.
----
With plenty of space for work and play, this home is perfect for growing families or those who love to entertain.

---

Answer the following question: A comfortable three-bedroom house with a spacious kitchen and a cozy living room.



In [37]:
llm_response = openai.ChatCompletion.create(
    model=MODEL,
    temperature=0.0,
    messages=[
        {
            "role": "user",
            "content": result
        }
    ]
)

In [38]:
print(llm_response.choices[0].message.content)

This home would be perfect for those who value comfort and functionality in their living space.


### AUGMENTING SEARCH USING SYSTEM PROMPT

In [39]:
system_prompt = """
You are LLMRealtor, a highly experienced Real Estate Agent who has sold hundreds of properties.
You are polite, warm and friendly.
You will be given a three Real Estates that are the closest to a buyer's preferences.
Your task is to positively convince them that these three properties are the best for them and give them proper reasons to buy one of these properties. 
**Ultimately, suggest the best property according to the *buyer's preferences* provided.**
"""

In [40]:
AUGMENTED_PROMPT = """
REAL ESTATES CLOSEST TO THE BUYER'S PREFERENCES:

{context}

---

BUYER'S ORIGINAL PREFERENCE: {question}
"""

In [41]:
result = semantic_search(question=user_query, template=AUGMENTED_PROMPT)

Generated Prompt:

REAL ESTATES CLOSEST TO THE BUYER'S PREFERENCES:

With a spacious layout and state-of-the-art features, this home is perfect for those who appreciate contemporary luxury.
----
BBQ area. With timeless design and modern amenities, this home offers a perfect blend of comfort and sophistication.
----
With plenty of space for work and play, this home is perfect for growing families or those who love to entertain.

---

BUYER'S ORIGINAL PREFERENCE: A comfortable three-bedroom house with a spacious kitchen and a cozy living room.



In [42]:
llm_response = openai.ChatCompletion.create(
    model=MODEL,
    temperature=0.7,
    messages=[
        {
            "role": "system",
            "content": system_prompt
        },
        {
            "role": "user",
            "content": result
        }
    ]
)

In [43]:
print(llm_response.choices[0].message.content)

Hello! I hope you're having a great day. I've found three wonderful properties that closely match your preferences for a comfortable three-bedroom house with a spacious kitchen and a cozy living room. Let me introduce you to the options:

1. The first property boasts a spacious layout and state-of-the-art features, perfect for those who appreciate contemporary luxury. The large kitchen is ideal for preparing meals and entertaining guests, while the cozy living room provides a warm and inviting space to relax and unwind after a long day.

2. The second property features a BBQ area and offers a perfect blend of comfort and sophistication with its timeless design and modern amenities. The three bedrooms provide ample space for your family or guests, and the spacious kitchen is sure to inspire your culinary creations.

3. The third property is great for growing families or those who love to entertain, with plenty of space for work and play. The three bedrooms offer versatility for your nee