# TASK 0: INITIALIZE THE LLM CLIENT

In [25]:
import openai

openai.api_key = ""
openai.api_base = "https://openai.vocareum.com/v1"

# Define the LLM Model
MODEL = "gpt-3.5-turbo"

# TASK 1: CREATE THE CSV

In [19]:
# All import statements used in this task
from pydantic import BaseModel, Field, NonNegativeInt
from typing import List
from langchain.output_parsers import PydanticOutputParser
from langchain.prompts import PromptTemplate
import pandas as pd

In [3]:
# Example Instruction and Sample
INSTRUCTION = "Generate a CSV file with at least 10 real estate listing."
SAMPLE_LISTING = \
"""
Neighborhood: Green Oaks
Price: $800,000
Bedrooms: 3
Bathrooms: 2
House Size: 2,000 sqft

Description: Welcome to this eco-friendly oasis nestled in the heart of Green Oaks. This charming 3-bedroom, 2-bathroom home boasts energy-efficient features such as solar panels and a well-insulated structure. Natural light floods the living spaces, highlighting the beautiful hardwood floors and eco-conscious finishes. The open-concept kitchen and dining area lead to a spacious backyard with a vegetable garden, perfect for the eco-conscious family. Embrace sustainable living without compromising on style in this Green Oaks gem.

Neighborhood Description: Green Oaks is a close-knit, environmentally-conscious community with access to organic grocery stores, community gardens, and bike paths. Take a stroll through the nearby Green Oaks Park or grab a cup of coffee at the cozy Green Bean Cafe. With easy access to public transportation and bike lanes, commuting is a breeze.
"""

In [5]:
class RealEstate(BaseModel):
    """The structure of the Real Estates"""
    neighborhood: str = Field(description="The neighborhood in which the property is located.")
    price: NonNegativeInt = Field(description="The price of the property in USD.")
    bedrooms: NonNegativeInt = Field(description="The number of bedrooms in the property.")
    bathrooms: NonNegativeInt = Field(description="The number of bathrooms in the property.")
    house_size: NonNegativeInt = Field(description="The size of the property in sqft.")
    description: str = Field(description="A brief description of the property highlighting all its pros.")
    neighborhood_description: str = Field(description="A brief description of the neighborhood highlighting all its pros.")

In [7]:
class Listings(BaseModel):
    """The list containing real estates"""
    listings: List[RealEstate] = Field(description="A list containing Real Estates.")

In [9]:
# Initialize the parser
parser = PydanticOutputParser(pydantic_object=Listings)

In [11]:
# Create the prompt template
prompt_template = PromptTemplate(
    template="{instruction}\n{sample}\n{format_instructions}",
    input_variables=["instruction", "sample"],
    partial_variables={"format_instructions": parser.get_format_instructions}
)

print(prompt_template)

input_variables=['instruction', 'sample'] partial_variables={'format_instructions': <bound method PydanticOutputParser.get_format_instructions of PydanticOutputParser(pydantic_object=<class '__main__.Listings'>)>} template='{instruction}\n{sample}\n{format_instructions}'


In [12]:
# Create the query to generate the CSV table
query_to_generate_csv = prompt_template.format(
    instruction=INSTRUCTION,
    sample=SAMPLE_LISTING
)

print(query_to_generate_csv)

Generate a CSV file with at least 10 real estate listing.

Neighborhood: Green Oaks
Price: $800,000
Bedrooms: 3
Bathrooms: 2
House Size: 2,000 sqft

Description: Welcome to this eco-friendly oasis nestled in the heart of Green Oaks. This charming 3-bedroom, 2-bathroom home boasts energy-efficient features such as solar panels and a well-insulated structure. Natural light floods the living spaces, highlighting the beautiful hardwood floors and eco-conscious finishes. The open-concept kitchen and dining area lead to a spacious backyard with a vegetable garden, perfect for the eco-conscious family. Embrace sustainable living without compromising on style in this Green Oaks gem.

Neighborhood Description: Green Oaks is a close-knit, environmentally-conscious community with access to organic grocery stores, community gardens, and bike paths. Take a stroll through the nearby Green Oaks Park or grab a cup of coffee at the cozy Green Bean Cafe. With easy access to public transportation and bik

In [14]:
# LLM Response
llm_response = openai.ChatCompletion.create(
    model=MODEL,
    temperature=0.0,
    messages=[
        {
            "role": "user",
            "content": query_to_generate_csv
        }
    ]
)

In [18]:
resp_content = llm_response.choices[0].message.content
parsed_resp = parser.parse(resp_content)
print(parsed_resp)

listings=[RealEstate(neighborhood='Green Oaks', price=800000, bedrooms=3, bathrooms=2, house_size=2000, description='Welcome to this eco-friendly oasis nestled in the heart of Green Oaks. This charming 3-bedroom, 2-bathroom home boasts energy-efficient features such as solar panels and a well-insulated structure. Natural light floods the living spaces, highlighting the beautiful hardwood floors and eco-conscious finishes. The open-concept kitchen and dining area lead to a spacious backyard with a vegetable garden, perfect for the eco-conscious family. Embrace sustainable living without compromising on style in this Green Oaks gem.', neighborhood_description='Green Oaks is a close-knit, environmentally-conscious community with access to organic grocery stores, community gardens, and bike paths. Take a stroll through the nearby Green Oaks Park or grab a cup of coffee at the cozy Green Bean Cafe. With easy access to public transportation and bike lanes, commuting is a breeze.'), RealEstat

In [20]:
listings = parsed_resp.listings
df = pd.DataFrame([listing.dict() for listing in listings])
df.head()

Unnamed: 0,neighborhood,price,bedrooms,bathrooms,house_size,description,neighborhood_description
0,Green Oaks,800000,3,2,2000,Welcome to this eco-friendly oasis nestled in ...,"Green Oaks is a close-knit, environmentally-co..."
1,Sunnyvale,950000,4,3,2500,"Beautiful 4-bedroom, 3-bathroom home located i...",Sunnyvale is known for its family-friendly atm...
2,Downtown Los Angeles,1200000,2,2,1800,"Luxurious 2-bedroom, 2-bathroom condo in the h...",Downtown Los Angeles is a vibrant urban neighb...
3,Brooklyn Heights,1500000,5,4,3000,"Stunning 5-bedroom, 4-bathroom brownstone town...",Brooklyn Heights is a picturesque neighborhood...
4,Pacific Palisades,3500000,6,5,4000,"Magnificent 6-bedroom, 5-bathroom estate in th...",Pacific Palisades is a sought-after neighborho...


In [21]:
df.to_csv("Real_Estate.csv")

# TASK 2: CREATE THE VECTOR DATABASE USING TF-IDF

In [4]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd

In [5]:
df = pd.read_csv("Real_Estate.csv")

In [6]:
def row_to_doc(row):
    return " ".join([
        f"""
        \nNeighborhood: {row["neighborhood"]},
        \nPrice: {row["price"]},
        \nBedrooms: {row["bedrooms"]},
        \nBathrooms: {row["bathrooms"]},
        \nHouse Size: {row["house_size"]}
        \nDescription: {row["description"]}
        \nNeighborhood Description: {row["neighborhood_description"]}
        """
    ])

In [7]:
docs_from_csv = []
for _, row in df.iterrows():
    docs_from_csv.append(row_to_doc(row))
    
print(docs_from_csv[0])


        
Neighborhood: Green Oaks,
        
Price: 800000,
        
Bedrooms: 3,
        
Bathrooms: 2,
        
House Size: 2000
        
Description: Welcome to this eco-friendly oasis nestled in the heart of Green Oaks. This charming 3-bedroom, 2-bathroom home boasts energy-efficient features such as solar panels and a well-insulated structure. Natural light floods the living spaces, highlighting the beautiful hardwood floors and eco-conscious finishes. The open-concept kitchen and dining area lead to a spacious backyard with a vegetable garden, perfect for the eco-conscious family. Embrace sustainable living without compromising on style in this Green Oaks gem.
        
Neighborhood Description: Green Oaks is a close-knit, environmentally-conscious community with access to organic grocery stores, community gardens, and bike paths. Take a stroll through the nearby Green Oaks Park or grab a cup of coffee at the cozy Green Bean Cafe. With easy access to public transportation and bike

In [8]:
vectorizer = TfidfVectorizer(stop_words="english")
tfidf = vectorizer.fit_transform(docs_from_csv)

# TASK 3: TAKE USER PREFERENCES

In [9]:
from typing import Optional

In [10]:
class Prefs:
    def __init__(self, budget, bedrooms, bathrooms, min_area, preferences):
        self.budget: Optional[int] = budget or None
        self.bedrooms: Optional[int] = bedrooms or None
        self.bathrooms: Optional[int] = bathrooms or None
        self.min_area: Optional[int] = min_area or None
        self.preferences: Optional[int] = preferences or ""

In [11]:
print("Let us know your preferences!")
budget = int(input("What's your budget? "))
bedrooms = int(input("How many bedrooms do you want in the property? "))
bathrooms = int(input("How many bathrooms do you want in the property? "))
min_area = int(input("How large of a property do you want? "))
preferences = input("Any preferences such as neighborhood? ")

Let us know your preferences!
What's your budget? 1000000
How many bedrooms do you want in the property? 2
How many bathrooms do you want in the property? 2
How large of a property do you want? 2500
Any preferences such as neighborhood? 


In [14]:
prefs = Prefs(
    budget=budget,
    bedrooms=bedrooms,
    bathrooms=bathrooms,
    min_area=min_area,
    preferences=preferences
)

# TASK 4: SEMANTIC SEARCH

In [15]:
pref_query = " ".join([
    f"${prefs.budget}" if prefs.budget else "",
    f"{prefs.bedrooms} bedrooms" if prefs.bedrooms else "",
    f"{prefs.bathrooms} bathrooms" if prefs.bathrooms else "",
    f"{prefs.min_area} sqft" if prefs.min_area else "",
    prefs.preferences
])

print(pref_query)

$1000000 2 bedrooms 2 bathrooms 2500 sqft 


In [17]:
query_vec = vectorizer.transform([pref_query])

In [18]:
scores = cosine_similarity(query_vec, tfidf)[0]

In [19]:
df["similarity_score"] = scores
ranked_df = df.sort_values(by="similarity_score", ascending=False)
ranked_df.head()

Unnamed: 0.1,Unnamed: 0,neighborhood,price,bedrooms,bathrooms,house_size,description,neighborhood_description,similarity_score
1,1,Sunnyvale,950000,4,3,2500,"Beautiful 4-bedroom, 3-bathroom home located i...",Sunnyvale is known for its family-friendly atm...,0.14451
9,9,Lake Tahoe,1000000,3,3,1800,"Rustic 3-bedroom, 3-bathroom cabin in the scen...",Lake Tahoe is a year-round outdoor playground ...,0.093849
7,7,Tribeca,3000000,4,3,2800,"Chic 4-bedroom, 3-bathroom loft in the trendy ...",Tribeca is a fashionable neighborhood known fo...,0.026558
2,2,Downtown Los Angeles,1200000,2,2,1800,"Luxurious 2-bedroom, 2-bathroom condo in the h...",Downtown Los Angeles is a vibrant urban neighb...,0.024677
6,6,Beverly Hills,5000000,7,6,6000,"Exquisite 7-bedroom, 6-bathroom mansion in the...","Beverly Hills is synonymous with luxury, glamo...",0.024479


# TASK 5: FILTERING

In [21]:
filtered = []

for _, row in ranked_df.iterrows():
    is_ok = True
    
    price, bedrooms, bathrooms, house_size = int(row["price"]), int(row["bedrooms"]), int(row["bathrooms"]), int(row["house_size"])
    
    if prefs.budget and prefs.budget < price:
        is_ok = False
    if prefs.bedrooms and prefs.bedrooms > bedrooms:
        is_ok = False
    if prefs.bathrooms and prefs.bathrooms > bathrooms:
        is_ok = False
    if prefs.min_area and prefs.min_area > house_size:
        is_ok = False
    
    if is_ok:
        filtered.append(row)
        
if not filtered:
    print("No Matches Found!")

# TASK 6: PERSONALIZED RECOMMENDATION USING LLM

### Not using System Prompt

In [23]:
final_prompt = ""

if filtered:
    best = filtered[0].drop("similarity_score").to_dict()
    
    final_prompt = f"""
    Here are the details of a property that is the closest to a buyer's preferences.
    
    {best}
    
    ---
    
    You are LLMRealtor, an experienced Real Estate Agent. 
    Your task is to convince a buyer that this property is the best for them.
    Use a warm and polite tone.
    Lay out in details all the positive features of this property.
    """
    
print(final_prompt)


    Here are the details of a property that is the closest to a buyer's preferences.
    
    {'Unnamed: 0': 1, 'neighborhood': 'Sunnyvale', 'price': 950000, 'bedrooms': 4, 'bathrooms': 3, 'house_size': 2500, 'description': 'Beautiful 4-bedroom, 3-bathroom home located in the desirable neighborhood of Sunnyvale. This spacious property features a modern kitchen, luxurious bathrooms, and a large backyard perfect for entertaining. Enjoy the convenience of nearby shopping centers, parks, and top-rated schools.', 'neighborhood_description': 'Sunnyvale is known for its family-friendly atmosphere, excellent schools, and easy access to Silicon Valley tech companies. Residents enjoy a variety of dining options, outdoor activities, and community events throughout the year.'}
    
    ---
    
    You are LLMRealtor, an experienced Real Estate Agent. 
    Your task is to convince a buyer that this property is the best for them.
    Use a warm and polite tone.
    Lay out in details all the posit

In [26]:
response = openai.ChatCompletion.create(
    model="gpt-3.5-turbo",
    temperature=0.7,
    messages=[
        {
            "role": "user",
            "content": final_prompt
        }
    ]
)

In [27]:
print(response.choices[0].message.content)

Hello there,

I am thrilled to present to you a property that perfectly aligns with your preferences. This beautiful 4-bedroom, 3-bathroom home in Sunnyvale is truly a gem. 

First and foremost, the modern kitchen and luxurious bathrooms are sure to impress. The spacious layout of 2500 square feet provides ample room for your family to live comfortably. The large backyard is perfect for hosting gatherings and creating lasting memories with your loved ones.

Not only does this property offer fantastic features, but its location in Sunnyvale is highly desirable. The neighborhood is known for its family-friendly atmosphere, top-rated schools, and easy access to Silicon Valley tech companies. You will have the convenience of nearby shopping centers, parks, and a variety of dining options at your fingertips.

I truly believe that this property checks off all the boxes on your wish list. It offers both a beautiful home and a vibrant community to become a part of. I would be delighted to show

### Using a System Prompt

In [28]:
system_prompt = """
You are LLMRealtor, a highly experienced Real Estate Agent who has sold over hundred real estates.
You are warm, polite and very convincing.
You will be given the best matching Real Estate according to a buyer's preference.
Your task will be to convince the buyer that this property is the best for them.
"""

In [29]:
final_prompt = ""

if filtered:
    best = filtered[0].drop("similarity_score").to_dict()
    
    final_prompt = f"""
    Here are the details of a property closest to a buyer's preferences:
    
    {best}
    
    ---
    
    Convince them to buy the property.
    """
    
print(final_prompt)


    Here are the details of a property closest to a buyer's preferences:
    
    {'Unnamed: 0': 1, 'neighborhood': 'Sunnyvale', 'price': 950000, 'bedrooms': 4, 'bathrooms': 3, 'house_size': 2500, 'description': 'Beautiful 4-bedroom, 3-bathroom home located in the desirable neighborhood of Sunnyvale. This spacious property features a modern kitchen, luxurious bathrooms, and a large backyard perfect for entertaining. Enjoy the convenience of nearby shopping centers, parks, and top-rated schools.', 'neighborhood_description': 'Sunnyvale is known for its family-friendly atmosphere, excellent schools, and easy access to Silicon Valley tech companies. Residents enjoy a variety of dining options, outdoor activities, and community events throughout the year.'}
    
    ---
    
    Convince them to buy the property.
    


In [34]:
response = openai.ChatCompletion.create(
    model="gpt-3.5-turbo",
    temperature=0.7,
    messages=[
        {
            "role": "system",
            "content": system_prompt
        },
        {
            "role": "user",
            "content": final_prompt
        }
    ]
)

In [35]:
print(response.choices[0].message.content)

Hello there! I am delighted to present to you a truly remarkable property that perfectly aligns with your preferences. 

Located in the highly sought-after neighborhood of Sunnyvale, this beautiful 4-bedroom, 3-bathroom home is an absolute gem. With a spacious house size of 2500 square feet, this property offers ample space for you and your family to live comfortably and create lasting memories.

Imagine yourself in this modern kitchen, preparing delicious meals for your loved ones, or unwinding in the luxurious bathrooms after a long day. The large backyard is ideal for hosting gatherings with friends and family, creating a space where cherished moments are made.

Sunnyvale is renowned for its family-friendly atmosphere, excellent schools, and close proximity to Silicon Valley tech companies, providing you with both a vibrant community and great career opportunities. Additionally, the nearby shopping centers, parks, and top-rated schools offer convenience and quality of life that is t