# Vanilla vector search

In [27]:
import warnings
warnings.filterwarnings("ignore")
import os
import pandas as pd
from dotenv import load_dotenv
from datasets import load_dataset
import custom_utils
load_dotenv()



True

## Data Loading

In [2]:
# NOTE: Make sure you have an Hugging Face token (HF_TOKEN) in your development environemnt (.env file)
# NOTE: https://huggingface.co/datasets/MongoDB/airbnb_embeddings
# NOTE: This dataset contains several records with datapoint representing an airbnb listing.
# NOTE: This dataset contains text and image embeddings, but this lessons only uses the text embeddings


dataset = load_dataset("MongoDB/airbnb_embeddings", streaming=True, split="train")
dataset = dataset.take(100)

# Convert the dataset to a pandas dataframe
dataset_df = pd.DataFrame(dataset)
dataset_df.head()

Unnamed: 0,_id,listing_url,name,summary,space,description,neighborhood_overview,notes,transit,access,...,images,host,address,availability,review_scores,reviews,weekly_price,monthly_price,text_embeddings,image_embeddings
0,10006546,https://www.airbnb.com/rooms/10006546,Ribeira Charming Duplex,Fantastic duplex apartment with three bedrooms...,Privileged views of the Douro River and Ribeir...,Fantastic duplex apartment with three bedrooms...,"In the neighborhood of the river, you can find...",Lose yourself in the narrow streets and stairc...,Transport: • Metro station and S. Bento railwa...,We are always available to help guests. The ho...,...,"{'thumbnail_url': '', 'medium_url': '', 'pictu...","{'host_id': '51399391', 'host_url': 'https://w...","{'street': 'Porto, Porto, Portugal', 'suburb':...","{'availability_30': 28, 'availability_60': 47,...","{'review_scores_accuracy': 9, 'review_scores_c...","[{'_id': '58663741', 'date': 2016-01-03 05:00:...",,,"[0.0123710884, -0.0180913936, -0.016843712, -0...","[-0.1302358955, 0.1534578055, 0.0199299306, -0..."
1,10021707,https://www.airbnb.com/rooms/10021707,Private Room in Bushwick,Here exists a very cozy room for rent in a sha...,,Here exists a very cozy room for rent in a sha...,,,,,...,"{'thumbnail_url': '', 'medium_url': '', 'pictu...","{'host_id': '11275734', 'host_url': 'https://w...","{'street': 'Brooklyn, NY, United States', 'sub...","{'availability_30': 0, 'availability_60': 0, '...","{'review_scores_accuracy': 10, 'review_scores_...","[{'_id': '61050713', 'date': 2016-01-31 05:00:...",,,"[0.0153845912, -0.0348115042, -0.0093448907, 0...","[0.0340401195, 0.1742489338, -0.1572628617, 0...."
2,1001265,https://www.airbnb.com/rooms/1001265,Ocean View Waikiki Marina w/prkg,A short distance from Honolulu's billion dolla...,Great studio located on Ala Moana across the s...,A short distance from Honolulu's billion dolla...,You can breath ocean as well as aloha.,,Honolulu does have a very good air conditioned...,"Pool, hot tub and tennis",...,"{'thumbnail_url': '', 'medium_url': '', 'pictu...","{'host_id': '5448114', 'host_url': 'https://ww...","{'street': 'Honolulu, HI, United States', 'sub...","{'availability_30': 16, 'availability_60': 46,...","{'review_scores_accuracy': 9, 'review_scores_c...","[{'_id': '4765259', 'date': 2013-05-24 04:00:0...",650.0,2150.0,"[-0.0400562622, -0.0405789167, 0.000644172, 0....","[-0.1640156209, 0.1256971657, 0.6594450474, -0..."
3,10009999,https://www.airbnb.com/rooms/10009999,Horto flat with small garden,One bedroom + sofa-bed in quiet and bucolic ne...,Lovely one bedroom + sofa-bed in the living ro...,One bedroom + sofa-bed in quiet and bucolic ne...,This charming ground floor flat is located in ...,"There´s a table in the living room now, that d...","Easy access to transport (bus, taxi, car) and ...",,...,"{'thumbnail_url': '', 'medium_url': '', 'pictu...","{'host_id': '1282196', 'host_url': 'https://ww...","{'street': 'Rio de Janeiro, Rio de Janeiro, Br...","{'availability_30': 0, 'availability_60': 0, '...","{'review_scores_accuracy': None, 'review_score...",[],1492.0,4849.0,"[-0.063234821, 0.0017937823, -0.0243996996, -0...","[-0.1292964518, 0.037789464, 0.2443587631, 0.0..."
4,10047964,https://www.airbnb.com/rooms/10047964,Charming Flat in Downtown Moda,Fully furnished 3+1 flat decorated with vintag...,The apartment is composed of 1 big bedroom wit...,Fully furnished 3+1 flat decorated with vintag...,With its diversity Moda- Kadikoy is one of the...,,,,...,"{'thumbnail_url': '', 'medium_url': '', 'pictu...","{'host_id': '1241644', 'host_url': 'https://ww...","{'street': 'Kadıköy, İstanbul, Turkey', 'subur...","{'availability_30': 27, 'availability_60': 57,...","{'review_scores_accuracy': 10, 'review_scores_...","[{'_id': '68162172', 'date': 2016-04-02 04:00:...",,,"[0.023723349, 0.0064210771, -0.0339970738, -0....","[-0.1006749049, 0.4022984803, -0.1821258366, 0..."


In [3]:
print("Columns:", dataset_df.columns)

Columns: Index(['_id', 'listing_url', 'name', 'summary', 'space', 'description',
       'neighborhood_overview', 'notes', 'transit', 'access', 'interaction',
       'house_rules', 'property_type', 'room_type', 'bed_type',
       'minimum_nights', 'maximum_nights', 'cancellation_policy',
       'last_scraped', 'calendar_last_scraped', 'first_review', 'last_review',
       'accommodates', 'bedrooms', 'beds', 'number_of_reviews', 'bathrooms',
       'amenities', 'price', 'security_deposit', 'cleaning_fee',
       'extra_people', 'guests_included', 'images', 'host', 'address',
       'availability', 'review_scores', 'reviews', 'weekly_price',
       'monthly_price', 'text_embeddings', 'image_embeddings'],
      dtype='object')


## Document Modelling

In [4]:
from typing import List, Optional
from pydantic import BaseModel, ValidationError
from datetime import datetime

In [5]:
class Host(BaseModel):
    host_id: str
    host_url: str
    host_name: str
    host_location: str
    host_about: str
    host_response_time: Optional[str] = None
    host_thumbnail_url: str
    host_picture_url: str
    host_response_rate: Optional[int] = None
    host_is_superhost: bool
    host_has_profile_pic: bool
    host_identity_verified: bool

In [6]:
class Location(BaseModel):
    type: str
    coordinates: List[float]
    is_location_exact: bool

class Address(BaseModel):
    street: str
    government_area: str
    market: str
    country: str
    country_code: str
    location: Location

In [7]:
class Review(BaseModel):
    _id: str
    date: Optional[datetime] = None
    listing_id: str
    reviewer_id: str
    reviewer_name: Optional[str] = None
    comments: Optional[str] = None

In [8]:
class Listing(BaseModel):
    _id: int
    listing_url: str
    name: str
    summary: str
    space: str
    description: str
    neighborhood_overview: Optional[str] = None
    notes: Optional[str] = None
    transit: Optional[str] = None
    access: str
    interaction: Optional[str] = None
    house_rules: str
    property_type: str
    room_type: str
    bed_type: str
    minimum_nights: int
    maximum_nights: int
    cancellation_policy: str
    last_scraped: Optional[datetime] = None
    calendar_last_scraped: Optional[datetime] = None
    first_review: Optional[datetime] = None
    last_review: Optional[datetime] = None
    accommodates: int
    bedrooms: Optional[float] = 0
    beds: Optional[float] = 0
    number_of_reviews: int
    bathrooms: Optional[float] = 0
    amenities: List[str]
    price: int
    security_deposit: Optional[float] = None
    cleaning_fee: Optional[float] = None
    extra_people: int
    guests_included: int
    images: dict
    host: Host
    address: Address
    availability: dict
    review_scores: dict
    reviews: List[Review]
    text_embeddings: List[float]


In [9]:
records = dataset_df.to_dict(orient='records')

In [10]:
# To handle catch `NaT` values
for record in records:
    for key, value in record.items():
        # Check if the value is list-like; if so, process each element
        if isinstance(value, list):
            processed_list = [None if pd.isnull(v) else v for v in value]
            record[key] = processed_list
        # For scalar values,continue as before
        else:
            if pd.isnull(value):
                record[key] = None

In [11]:
try:
    # Convert each dictionary to a Movie instance
    listings = [Listing(**record).model_dump() for record in records]
    # Get an overview of a single datapoint
    print(listings[0].keys())
except ValidationError as e:
    print(e)

dict_keys(['listing_url', 'name', 'summary', 'space', 'description', 'neighborhood_overview', 'notes', 'transit', 'access', 'interaction', 'house_rules', 'property_type', 'room_type', 'bed_type', 'minimum_nights', 'maximum_nights', 'cancellation_policy', 'last_scraped', 'calendar_last_scraped', 'first_review', 'last_review', 'accommodates', 'bedrooms', 'beds', 'number_of_reviews', 'bathrooms', 'amenities', 'price', 'security_deposit', 'cleaning_fee', 'extra_people', 'guests_included', 'images', 'host', 'address', 'availability', 'review_scores', 'reviews', 'text_embeddings'])


## Database Creation and Connection

In [13]:
from pymongo import MongoClient
from pymongo.operations import SearchIndexModel

database_name = "airbnb_dataset"
collection_name = "listings_reviews"

def get_mongo_client(mongo_uri: str) -> MongoClient:
    """Establish connection to the MongoDB Atlas cluster."""
    try:
        # Remove the trailing comma that's creating a tuple
        client = MongoClient(mongo_uri)
        # Test the connection
        client.admin.command('ping')
        print("Connected to MongoDB Atlas successfully")
        return client
    except Exception as e:
        print(f"Failed to connect to MongoDB Atlas: {e}")
        raise

# Your MongoDB Atlas connection string should look like this:
# MONGO_URI = "mongodb+srv://<username>:<password>@<cluster-url>/?retryWrites=true&w=majority"
MONGO_URI = os.environ.get("MONGO_URI")
mongo_client = get_mongo_client(mongo_uri=MONGO_URI)

# Now you can get your database and collection
db = mongo_client[database_name]  # Alternative syntax to get_database()
collection = db[collection_name]  # Alternative syntax to get_collection()

Connected to MongoDB Atlas successfully


In [14]:
# Delete any existing records in the collection
collection.delete_many({})

DeleteResult({'n': 100, 'electionId': ObjectId('7fffffff000000000000001b'), 'opTime': {'ts': Timestamp(1731655634, 91), 't': 27}, 'ok': 1.0, '$clusterTime': {'clusterTime': Timestamp(1731655634, 100), 'signature': {'hash': b'\xe7\xdcJ\xce\xd1\xc1\x80J\xe2z\xf1H\x86\x0f6\xdc\xfe\x9dW\x03', 'keyId': 7400109933816971265}}, 'operationTime': Timestamp(1731655634, 91)}, acknowledged=True)

## Data Ingestion

In [15]:
# The ingestion process might take a few minutes
collection.insert_many(listings)
print(f"Inserted {collection.count_documents({})} records into the collection")

Inserted 100 records into the collection


In [16]:
## Vector Search Index definition

text_embedding_field_name = "text_embeddings"
vector_search_index_name_text = "vector_index_text"

vector_search_index_model = SearchIndexModel(
    definition={
        "mappings": { # describes how fields in the database documents are indexed and stored
            "dynamic": True, # automatically index new fields that appear in the document
            "fields": { # properties of the fields that will be indexed.
                text_embedding_field_name: { 
                    "dimensions": 1536, # size of the vector.
                    "similarity": "dotProduct", # algorithm used to compute the similarity between vectors
                    "type": "knnVector",
                }
            },
        }
    },
    name=vector_search_index_name_text, # identifier for the vector search index
)

In [17]:
# Check if the index already exists
index_exists = False
for index in collection.list_indexes():
    print(index)
    if index['name'] == vector_search_index_name_text:
        index_exists = True
        break

SON([('v', 2), ('key', SON([('_id', 1)])), ('name', '_id_')])


In [18]:
import time

# Create the index if it doesn't exist
if not index_exists:
    try:
        result = collection.create_search_index(model=vector_search_index_model)
        print("Creating index...")
        time.sleep(20)
        print("Index created succesfully:", result)
        print("Wait a few minutes before search with index to ensure it's ready")
    except Exception as e:
        print(f"Error creating vector search index: {str(e)}")
else:
    print(f"Index '{vector_search_index_name_text}' already exists")


Error creating vector search index: Duplicate Index, full error: {'ok': 0.0, 'errmsg': 'Duplicate Index', 'code': 68, 'codeName': 'IndexAlreadyExists', '$clusterTime': {'clusterTime': Timestamp(1731655649, 2), 'signature': {'hash': b'\xd3O\xa7o\x92\xca\xa1\x89\xd1-\x1c\x85\x08[\x9d\x93^\xf0hc', 'keyId': 7400109933816971265}}, 'operationTime': Timestamp(1731655649, 2)}


In [19]:
import openai

def get_embedding(text):
    """Generate an embedding for the given text using OpenAI's API."""

    # Check for valid input
    if not text or not isinstance(text, str):
        return None

    try:
        # Call OpenAI API to get the embedding
        embedding = openai.embeddings.create(
            input=text,
            model="text-embedding-3-small", dimensions=1536).data[0].embedding
        return embedding
    except Exception as e:
        print(f"Error in get_embedding: {e}")
        return None

## Compose Vector Search Query

In [20]:
def vector_search(user_query, db, collection, vector_index="vector_index_text"):
    """
    Perform a vector search in the MongoDB collection based on the user query.
    """
    # Generate embedding for the user query
    query_embedding = get_embedding(user_query)

    if query_embedding is None:
        return "Invalid query or embedding generation failed."

    # Define the vector search stage
    vector_search_stage = {
        "$vectorSearch": {
            "index": vector_index,
            "queryVector": query_embedding,
            "path": text_embedding_field_name,
            "numCandidates": 150,
            "limit": 20
        }
    }

    # Define the aggregate pipeline with the vector search stage
    pipeline = [vector_search_stage]

    try:
        # Execute the search and time it
        import time
        start_time = time.time()
        results = list(collection.aggregate(pipeline))
        end_time = time.time()
        
        print(f"Total search time: {(end_time - start_time)*1000:.2f} milliseconds")
        
        return results
    except Exception as e:
        print(f"Error during vector search: {e}")
        return []

## Handle user query

In [21]:
class SearchResultItem(BaseModel):
    name: str
    accommodates: Optional[int] = None
    address: Address
    summary: Optional[str] = None
    description: Optional[str] = None
    neighborhood_overview: Optional[str] = None
    notes: Optional[str] = None

In [22]:
from IPython.display import display, HTML

def handle_user_query(query, db, collection):
    # Assuming vector_search returns a list of dictionaries with keys 'title' and 'plot'
    get_knowledge = vector_search(query, db, collection)

    # Check if there are any results
    if not get_knowledge:
        return "No results found.", "No source information available."
        
     # Convert search results into a list of SearchResultItem models
    search_results_models = [
        SearchResultItem(**result)
        for result in get_knowledge
    ]

    # Convert search results into a DataFrame for better rendering in Jupyter
    search_results_df = pd.DataFrame([item.dict() for item in search_results_models])

    # Generate system response using OpenAI's completion
    completion = openai.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {
                "role": "system", 
                "content": "You are a airbnb listing recommendation system."},
            {
                "role": "user", 
                "content": f"Answer this user query: {query} with the following context:\n{search_results_df}"
            }
        ]
    )

    system_response = completion.choices[0].message.content

    # Print User Question, System Response, and Source Information
    print(f"- User Question:\n{query}\n")
    print(f"- System Response:\n{system_response}\n")

    # Display the DataFrame as an HTML table
    display(HTML(search_results_df.to_html()))

    # Return structured response and source info as a string
    return system_response

In [23]:
def vector_search(user_query, db, collection, vector_index="vector_index_text"):
    """
    Perform a vector search in the MongoDB collection based on the user query.
    """
    # Generate embedding for the user query
    query_embedding = get_embedding(user_query)

    if query_embedding is None:
        return "Invalid query or embedding generation failed."

    # Define the vector search stage
    vector_search_stage = {
        "$vectorSearch": {
            "index": vector_index,
            "queryVector": query_embedding,
            "path": text_embedding_field_name,
            "numCandidates": 150,
            "limit": 20
        }
    }

    # Define the aggregate pipeline with the vector search stage
    pipeline = [vector_search_stage]

    try:
        # Execute the search and time it
        import time
        start_time = time.time()
        results = list(collection.aggregate(pipeline))
        end_time = time.time()
        
        print(f"Total search time: {(end_time - start_time)*1000:.2f} milliseconds")
        
        return results
    except Exception as e:
        print(f"Error during vector search: {e}")
        return []

In [24]:
query = """
I want to stay in a place that's warm and friendly, 
and not too far from resturants, can you recommend a place? 
Include a reason as to why you've chosen your selection.
"""
handle_user_query(query, db, collection)

Total search time: 1936.09 milliseconds
- User Question:

I want to stay in a place that's warm and friendly, 
and not too far from resturants, can you recommend a place? 
Include a reason as to why you've chosen your selection.


- System Response:
Based on your desire for a warm and friendly place that is also close to restaurants, I recommend the **Cozy house at Beyoğlu**.

**Reason for recommendation**: Located in the heart of Beyoğlu, İstanbul, this listing is in a vibrant and dynamic neighborhood known for its rich cultural life and numerous dining options. The host creates a welcoming atmosphere, ensuring a friendly stay, while you’ll have easy access to a variety of local restaurants. This combination of hospitality and convenient location makes it an ideal choice for your stay!



Unnamed: 0,name,accommodates,address,summary,description,neighborhood_overview,notes
0,Cozy house at Beyoğlu,2,"{'street': 'Beyoğlu, İstanbul, Turkey', 'government_area': 'Beyoglu', 'market': 'Istanbul', 'country': 'Turkey', 'country_code': 'TR', 'location': {'type': 'Point', 'coordinates': [28.95825, 41.03777], 'is_location_exact': False}}","Hello dear Guests, wellcome to istanbul. My House is 2+1 and at second floor. 1 privite room is for my international guests. House is Very close to Taksim Square. You can Walk in 30 minutes or you can take a bus. The bus stop is only 100 m from home. You can go Taksim, Eminönü, Karaköy, Kadıköy, Beyazıt, Sultanahmet easily from home. I have 1 bed, two people can sleep together. Second person should pay extra. You can use kitchen, bathroom, free Wifi, dishwasher, washing machine, Ironing.","Hello dear Guests, wellcome to istanbul. My House is 2+1 and at second floor. 1 privite room is for my international guests. House is Very close to Taksim Square. You can Walk in 30 minutes or you can take a bus. The bus stop is only 100 m from home. You can go Taksim, Eminönü, Karaköy, Kadıköy, Beyazıt, Sultanahmet easily from home. I have 1 bed, two people can sleep together. Second person should pay extra. You can use kitchen, bathroom, free Wifi, dishwasher, washing machine, Ironing. Safe, quite, big house, wiev, Central, near the bus stop. Kitchen, bathroom, room, sitting room, balcony, washing machine, dishwasher, Ironing. You can ask any questions with phone, (Hidden by Airbnb) or email. + (Phone number hidden by Airbnb) is my (Hidden by Airbnb) number. (Email hidden by Airbnb) is my email adress Beyoğlu / Centre of İstanbul It calls Hasköy area, near the Golden Horn Its very closed to bus stop and Balat, Taksim, Pierre Loti, Eminönü, Sultan Ahmet, Kadıköy Just enjoy your hol","Beyoğlu / Centre of İstanbul It calls Hasköy area, near the Golden Horn",Just enjoy your holiday
1,Downtown Oporto Inn (room cleaning),2,"{'street': 'Porto, Porto, Portugal', 'government_area': 'Cedofeita, Ildefonso, Sé, Miragaia, Nicolau, Vitória', 'market': 'Porto', 'country': 'Portugal', 'country_code': 'PT', 'location': {'type': 'Point', 'coordinates': [-8.60867, 41.1543], 'is_location_exact': False}}","Tradicional building, with high ceilings next to City Hall or Trindade Subway station, at a short walking distance from the historic center of this beautiful city. R It is the property of a book novel writer.","Tradicional building, with high ceilings next to City Hall or Trindade Subway station, at a short walking distance from the historic center of this beautiful city. R It is the property of a book novel writer. Cozy, located near the most interesting points of the city to provide a nice stay, with a low budget. Has a gift shop to buy handicraft, books and other gifts, It is a Inn with groundfloor and 1st floor, with access to a small patio at groundfloor. You may reach us by e-mail - (Email hidden by Airbnb) or (Hidden by Airbnb) (Website hidden by Airbnb) Exciting, urban and dinamic, stay with us, near the center, and enjoy a unique stay! Next to Trindade subway station, and also to Faria Guimarães subway station. Near Praca da República, where there are several bus stop. No private parking.","Exciting, urban and dinamic, stay with us, near the center, and enjoy a unique stay!",No private parking.
2,Banyan Bungalow,2,"{'street': 'Waialua, HI, United States', 'government_area': 'North Shore Oahu', 'market': 'Oahu', 'country': 'United States', 'country_code': 'US', 'location': {'type': 'Point', 'coordinates': [-158.1602, 21.57561], 'is_location_exact': False}}",The place to be on the north shore is where you can be steps from the ocean and watch the stars at night. Our 2 acre property (with tropical greenhouses) hosts a quiet cottage with private driveway/private access.,"The place to be on the north shore is where you can be steps from the ocean and watch the stars at night. Our 2 acre property (with tropical greenhouses) hosts a quiet cottage with private driveway/private access. Big, open space with lots of natural light. The cottage is clean and quiet - perfect for a good night's sleep. Meals can be easily prepared in the small kitchen. Microwave, hot plate, toaster, blender, coffee maker, full size fridge are available. Private driveway to access the property, parking on site. While we live on the property and will try to greet you at your arrival, quiet our busy schedules prevent this from happening. We are more than happy to recommend activities, dining options, directions, etc. This desirable neighborhood is comprised of other vacation rentals, local families, public beach access, and even a campground. Roosters and hens have made their home here as well. Many native birds can be seen and their many sweet sounds can be enjoyed. While you","This desirable neighborhood is comprised of other vacation rentals, local families, public beach access, and even a campground. Roosters and hens have made their home here as well. Many native birds can be seen and their many sweet sounds can be enjoyed.",
3,Homely Room in 5-Star New Condo@MTR,2,"{'street': 'Mongkok, Kowloon, Hong Kong', 'government_area': 'Yau Tsim Mong', 'market': 'Hong Kong', 'country': 'Hong Kong', 'country_code': 'HK', 'location': {'type': 'Point', 'coordinates': [114.17094, 22.32074], 'is_location_exact': False}}","Located in Mongkok, close to everything. 2min walk to both Mongkok and Mongkok East station. Gym, sauna and swimming pool (in summer) are available in the clubhouse. You'll have a private double room. Washroom and kitchen are shared with host. We are family of 3, my husband, 1y old son and me. The guest bedroom can accommodate two people, the 3rd person has to sleep on the couch (3'x6') in the living room.","Located in Mongkok, close to everything. 2min walk to both Mongkok and Mongkok East station. Gym, sauna and swimming pool (in summer) are available in the clubhouse. You'll have a private double room. Washroom and kitchen are shared with host. We are family of 3, my husband, 1y old son and me. The guest bedroom can accommodate two people, the 3rd person has to sleep on the couch (3'x6') in the living room. You will stay with my son, my husband and me. We couple love travelling very much and have been to more than 35 countries in the past few years. We like to share our travel tips and photos with everyone. There is a luxury clubhouse in my building, with gym and swimming pool. The building is newly built and it's the most luxury one in Mongkok area. Wifi, gym, swimming pool (in summer), kitchen, shower room, living room, dining room, TV My husband and I will stay with you. So we would like to chat with you anytime when we are at home. Our first son was born in May 2016. Many rest",Many restaurants and shops nearby.,"Just feel as home. We will give you all assistance. The 3rd guest is allowed to sleep on the sofa in the living room, and it's subject to an extra charge HK$250 per night."
4,Cheerful new renovated central apt,8,"{'street': 'Beyoğlu, İstanbul, Turkey', 'government_area': 'Beyoglu', 'market': 'Istanbul', 'country': 'Turkey', 'country_code': 'TR', 'location': {'type': 'Point', 'coordinates': [28.97477, 41.03735], 'is_location_exact': False}}","The full equipped apartment located in the heritage district of Istanbul, colorful Tarlabaşı. If you are looking for a place where you really want to taste the chaos with harmony like a real Istanbuller you are very welcome to stay in my apartment.","The full equipped apartment located in the heritage district of Istanbul, colorful Tarlabaşı. If you are looking for a place where you really want to taste the chaos with harmony like a real Istanbuller you are very welcome to stay in my apartment. Hi there! My name is Aybike. I love to travel, to discover new places and to meet new people. I will be glad to hosting you in Istanbul at my place. My apartment is newly renovated, clean, cosy, comfortable, large enough for 8 people and is situated literally at the heart of Istanbul. Apartment has one of the unique examples of turn-of-the-century Levantine architecture in Turkey: slim, four-storey bow-fronted homes that huddle along winding, narrow streets. Located in a street as it was used to be; the ground floors often served as stores or workshops. More likes to come to your posts in Instagram ! As a traveller my wish is to make you feel at home; drink your morning coffee while listening to the sound of Istanbul then take your map, j","Great location will allow you to explore and enjoy Taksim, Pera,Galata , Şişhane and Cihangir. (great walking distance for all the events), Easily accessible to subway, tram, ferries. The neighbourhood is friendly and diverse. It is only 3 minutes by walk to the Galatasaray Square which is located approximately at the center of the Istiklal Avenue. Istiklal Avenue is located in the historic Beyoğlu (Pera) district, it is an elegant pedestrian street, 1.4 kilometers long, which houses boutiques, music stores, bookstores, art galleries, cinemas, theatres, libraries, cafés, pubs, night clubs with live music, historical patisseries, chocolateries and restaurants. You should definately walk through the local bazaar on Sunday right beside the apartment. You can find anything you need with cheap prices; fruits, vegetables, nuts, fish, meat, dairy products! Local bazaars will be very helpful to understand the culture of Turkish people as well. There are couple of restaurants close by where yo","From/To Airports: There are several ways to get from the airport to the apartment, but the most convenient manner is to take “HAVATAŞ"" shuttle to Taksim Square departing every 30 minutes from the airport (from both airports- Atatürk and Sabiha Gökçen). As you may be unfamiliar with the area, I am happy to come and pick you up in front of Galatasaray Highschool (on Istiklal Street) which is 10 minutes walk from Taksim Square where you will get off. I can always advise you cheaper public transport options if you ask for. Useful information: You can rent the apartment/room for (a) day(s), week, month or longer periods of time. There is various supermarkets conveniently situated a block away from the apartment on the way to Istiklal street, also a small kiosk right next to the apartment and a laundry in 100 meters distance."
5,Sydney Hyde Park City Apartment (checkin from 6am),2,"{'street': 'Darlinghurst, NSW, Australia', 'government_area': 'Sydney', 'market': 'Sydney', 'country': 'Australia', 'country_code': 'AU', 'location': {'type': 'Point', 'coordinates': [151.21346, -33.87603], 'is_location_exact': False}}","Our city apartment is a bright, comfortable 1 bedroom with 24hr front-desk access. It is conveniently located directly across from Hyde Park and within walking distance of delightful cafes, restaurants, parks, major city attractions and public transport, including Museum Station which has a direct connection to Sydney Airport.","Our city apartment is a bright, comfortable 1 bedroom with 24hr front-desk access. It is conveniently located directly across from Hyde Park and within walking distance of delightful cafes, restaurants, parks, major city attractions and public transport, including Museum Station which has a direct connection to Sydney Airport. SPACE Comfortable 1 bedroom which has a queen-sized bed, is air conditioned and has a self-contained kitchen, including: fridge, dishwasher, oven, cooktop, microwave. SECURITY Secure 24hour front-desk access with full access to the rooftop area which has stunning views of Hydepark and the city, and includes: pool, sauna and gym. Laundry facilities, which are located on the mezzanine level and has coin operated washing machines and dryers. MY AVAILABILITY I am easily contactable at anytime. If I am unable to welcome you personally I will leave the keys to the apartment and any further information marked for your attention at the front desk. AREA Located withi","AREA Located within a vibrant and contemporary part of the city that is seeped in rich history and within walking distance of many attractions including: Hyde Park, St Mary's Cathedral, NSW Art Gallery, The Domaine, The Sydney Museum and less than a 30 minute walk to The Sydney Opera House, Sydney Harbour Bridge, Darling Harbour, Chinese Gardens, Wildlife Parks and Aquarium. SYDNEY PRIVÉ If you wish discover why Sydney is such an extraordinary place then I would highly recommend Sydney Privé, which is an exclusive concierge service that specialises in providing personalised tours and luxury experiences for the most discerning traveller. To truly experience Sydney, one must explore the local hideouts, and there is no better place to do this than in Darlinghurst. Darlinghurst is the heartbeat of the city! Checkout our airbnb Guidebook for some of our favourite hideouts. The Guidebook will provide you with website links and actual location.",IMPORTANT: Our apartment is privately owned and serviced. It is not part of the hotel that is operated from within the building. Internet: Our internet connection is wifi and dedicated to our apartment. So there is no sharing with other guests and no need to pay additional fees for internet usage.
6,"Cozy Nest, heart of the Plateau",1,"{'street': 'Montreal, QC, Canada', 'government_area': 'Le Plateau-Mont-Royal', 'market': 'Montreal', 'country': 'Canada', 'country_code': 'CA', 'location': {'type': 'Point', 'coordinates': [-73.58774, 45.52028], 'is_location_exact': True}}","Come spend a few days (or weeks) in our beautiful and cozy apartment situated in the heart of Montreal's most happening neighborhood! You will share the space with me as, maybe another guest and probably a cute cat or kitten (I sometimes foster cute little furry balls). LGBTQ friendly :) Looking forward to meeting you!","Come spend a few days (or weeks) in our beautiful and cozy apartment situated in the heart of Montreal's most happening neighborhood! You will share the space with me as, maybe another guest and probably a cute cat or kitten (I sometimes foster cute little furry balls). LGBTQ friendly :) Looking forward to meeting you! The spacious double room comes fully furnished with a double bed, closet space as well as a desk + High speed internet + heating + sheets and towels + direct access to a washer/dryer. Our apartment is a ground floor with a cute backyard and is situated right between two of Montreal's most happening neighborhoods (The Plateau and Mile End) as well as two main streets: Saint-Laurent and Mont-Royal. 5 minute walking distance from the beautiful Mont-Royal 10 minutes direct bus ride from downtown Steps away from bixi stands (Local bicycle sharing program). Our neighborhood is bustling with grocery shops, cafés, bars, restaurants and parks, all in walking distance! Please fee","We live right between two of Montreal's most happening neighborhoods. At the foot of the Mont-Royal and right next to Saint-Laurent, Mont-Royal and St-Denis streets. Our street is however quiet and peaceful.","Since we are on the ground floor, access in and out of the house is possible for folks who may have a limited mobility, however there is 1 step to get through the front door and the bathroom is unfortunately a bit too narrow to accomodate a wheelchair. Let me know if you have questions regarding specifics and I'll be able to assist!"
7,A bedroom far away from home,2,"{'street': 'Queens, NY, United States', 'government_area': 'Briarwood', 'market': 'New York', 'country': 'United States', 'country_code': 'US', 'location': {'type': 'Point', 'coordinates': [-73.82257, 40.71485], 'is_location_exact': True}}","NOTES: BEFORE BOOKING, PLEASE KNOW THAT WE ARE LOCATED ON THE 3RD FLOOR OF A WALK UP BUILDING (3 SHORT FLIGHT OF (Website hidden by Airbnb) IF YOU HAVE TROUBLE CLIMBING STAIRS, BE ADVISED, THANKS! THE CHECK-IN TIME IS STRICTLY BETWEEN 1PM-10PM.. AND CHECK OUT IS 11AM THIS LISTING IS ONLY FOR A BEDROOM AND NOT THE ENTIRE APARTMENT.","NOTES: BEFORE BOOKING, PLEASE KNOW THAT WE ARE LOCATED ON THE 3RD FLOOR OF A WALK UP BUILDING (3 SHORT FLIGHT OF (Website hidden by Airbnb) IF YOU HAVE TROUBLE CLIMBING STAIRS, BE ADVISED, THANKS! THE CHECK-IN TIME IS STRICTLY BETWEEN 1PM-10PM.. AND CHECK OUT IS 11AM THIS LISTING IS ONLY FOR A BEDROOM AND NOT THE ENTIRE APARTMENT. our place is a good sized apartment in a very quiet neighborhood. the bedroom is clean and cozy with a queen sized bed, so you can get all the rest you need. you can use the living room and kitchen. we also have a piano, so if you know how to play,you are free to do so! one of us will try to help you in your needs through out your stay. our neighborhood is basically peaceful, there a lot of children because the school bus picks them up by the corner. but all in all, its a quiet and friendly place to live. subway is about 5-8 minutes away by foot. there is street parking.","our neighborhood is basically peaceful, there a lot of children because the school bus picks them up by the corner. but all in all, its a quiet and friendly place to live.",
8,Modern Spacious 1 Bedroom Loft,4,"{'street': 'Montréal, Québec, Canada', 'government_area': 'Le Plateau-Mont-Royal', 'market': 'Montreal', 'country': 'Canada', 'country_code': 'CA', 'location': {'type': 'Point', 'coordinates': [-73.59111, 45.51889], 'is_location_exact': True}}","Prime location, amazing lighting and no annoying neighbours. Good place to rent if you want a relaxing time in Montreal.","Prime location, amazing lighting and no annoying neighbours. Good place to rent if you want a relaxing time in Montreal. Lot's of plants and lights. Really great modern bathroom that you will love showering in :) and a kitchen equipped with everything you need to cook a great meal. (we live here)",,
9,Surry Hills Studio - Your Perfect Base in Sydney,2,"{'street': 'Surry Hills, NSW, Australia', 'government_area': 'Sydney', 'market': 'Sydney', 'country': 'Australia', 'country_code': 'AU', 'location': {'type': 'Point', 'coordinates': [151.21554, -33.88029], 'is_location_exact': True}}","This spacious, light filled studio has everything you need to enjoy Sydney and is the perfect base for exploring. The harbour is an easy walk, Bondi a short bus trip away.","This spacious, light filled studio has everything you need to enjoy Sydney and is the perfect base for exploring. The harbour is an easy walk, Bondi a short bus trip away. Comfortable studio with a great layout. The bathroom has a full size bath tub and shower. Complete set of kitchen utensils etc for cooking and a full size fridge. The 3 seater sofa and 42' TV mean you can comfortably relax after a day at the beach or exploring. Large lounge room windows provide harbour views. You have full use of the entire studio and complete privacy during your stay. You have complete privacy during your stay. Surry Hills is a vibrant eclectic community and an amazing place to live, and this studio is in the centre of it all. A fantastic range of small bars and restaurants are on your doorstep, along with Sydney's best breakfasts for the next morning. Central to public transport, busses to Bondi Beach and the city are 50 metres from the front door. Travel Card 'Opal' travel cards work on all Sydne","Surry Hills is a vibrant eclectic community and an amazing place to live, and this studio is in the centre of it all. A fantastic range of small bars and restaurants are on your doorstep, along with Sydney's best breakfasts for the next morning. Central to public transport, busses to Bondi Beach and the city are 50 metres from the front door.","WiFi, Apple TV with Netflix App (for use with your own iTunes / Netflix account), 42' TV, Sound Dock"


'Based on your desire for a warm and friendly place that is also close to restaurants, I recommend the **Cozy house at Beyoğlu**.\n\n**Reason for recommendation**: Located in the heart of Beyoğlu, İstanbul, this listing is in a vibrant and dynamic neighborhood known for its rich cultural life and numerous dining options. The host creates a welcoming atmosphere, ensuring a friendly stay, while you’ll have easy access to a variety of local restaurants. This combination of hospitality and convenient location makes it an ideal choice for your stay!'

# Filtering with Metadata

## Load data

In [1]:
import custom_utils
from datasets import load_dataset
import pandas as pd

dataset = load_dataset("MongoDB/airbnb_embeddings", streaming=True, split="train")
dataset = dataset.take(100)
# Convert the dataset to a pandas dataframe
dataset_df = pd.DataFrame(dataset)
dataset_df.head(5)

  from .autonotebook import tqdm as notebook_tqdm


Unnamed: 0,_id,listing_url,name,summary,space,description,neighborhood_overview,notes,transit,access,...,images,host,address,availability,review_scores,reviews,weekly_price,monthly_price,text_embeddings,image_embeddings
0,10006546,https://www.airbnb.com/rooms/10006546,Ribeira Charming Duplex,Fantastic duplex apartment with three bedrooms...,Privileged views of the Douro River and Ribeir...,Fantastic duplex apartment with three bedrooms...,"In the neighborhood of the river, you can find...",Lose yourself in the narrow streets and stairc...,Transport: • Metro station and S. Bento railwa...,We are always available to help guests. The ho...,...,"{'thumbnail_url': '', 'medium_url': '', 'pictu...","{'host_id': '51399391', 'host_url': 'https://w...","{'street': 'Porto, Porto, Portugal', 'suburb':...","{'availability_30': 28, 'availability_60': 47,...","{'review_scores_accuracy': 9, 'review_scores_c...","[{'_id': '58663741', 'date': 2016-01-03 05:00:...",,,"[0.0123710884, -0.0180913936, -0.016843712, -0...","[-0.1302358955, 0.1534578055, 0.0199299306, -0..."
1,10021707,https://www.airbnb.com/rooms/10021707,Private Room in Bushwick,Here exists a very cozy room for rent in a sha...,,Here exists a very cozy room for rent in a sha...,,,,,...,"{'thumbnail_url': '', 'medium_url': '', 'pictu...","{'host_id': '11275734', 'host_url': 'https://w...","{'street': 'Brooklyn, NY, United States', 'sub...","{'availability_30': 0, 'availability_60': 0, '...","{'review_scores_accuracy': 10, 'review_scores_...","[{'_id': '61050713', 'date': 2016-01-31 05:00:...",,,"[0.0153845912, -0.0348115042, -0.0093448907, 0...","[0.0340401195, 0.1742489338, -0.1572628617, 0...."
2,1001265,https://www.airbnb.com/rooms/1001265,Ocean View Waikiki Marina w/prkg,A short distance from Honolulu's billion dolla...,Great studio located on Ala Moana across the s...,A short distance from Honolulu's billion dolla...,You can breath ocean as well as aloha.,,Honolulu does have a very good air conditioned...,"Pool, hot tub and tennis",...,"{'thumbnail_url': '', 'medium_url': '', 'pictu...","{'host_id': '5448114', 'host_url': 'https://ww...","{'street': 'Honolulu, HI, United States', 'sub...","{'availability_30': 16, 'availability_60': 46,...","{'review_scores_accuracy': 9, 'review_scores_c...","[{'_id': '4765259', 'date': 2013-05-24 04:00:0...",650.0,2150.0,"[-0.0400562622, -0.0405789167, 0.000644172, 0....","[-0.1640156209, 0.1256971657, 0.6594450474, -0..."
3,10009999,https://www.airbnb.com/rooms/10009999,Horto flat with small garden,One bedroom + sofa-bed in quiet and bucolic ne...,Lovely one bedroom + sofa-bed in the living ro...,One bedroom + sofa-bed in quiet and bucolic ne...,This charming ground floor flat is located in ...,"There´s a table in the living room now, that d...","Easy access to transport (bus, taxi, car) and ...",,...,"{'thumbnail_url': '', 'medium_url': '', 'pictu...","{'host_id': '1282196', 'host_url': 'https://ww...","{'street': 'Rio de Janeiro, Rio de Janeiro, Br...","{'availability_30': 0, 'availability_60': 0, '...","{'review_scores_accuracy': None, 'review_score...",[],1492.0,4849.0,"[-0.063234821, 0.0017937823, -0.0243996996, -0...","[-0.1292964518, 0.037789464, 0.2443587631, 0.0..."
4,10047964,https://www.airbnb.com/rooms/10047964,Charming Flat in Downtown Moda,Fully furnished 3+1 flat decorated with vintag...,The apartment is composed of 1 big bedroom wit...,Fully furnished 3+1 flat decorated with vintag...,With its diversity Moda- Kadikoy is one of the...,,,,...,"{'thumbnail_url': '', 'medium_url': '', 'pictu...","{'host_id': '1241644', 'host_url': 'https://ww...","{'street': 'Kadıköy, İstanbul, Turkey', 'subur...","{'availability_30': 27, 'availability_60': 57,...","{'review_scores_accuracy': 10, 'review_scores_...","[{'_id': '68162172', 'date': 2016-04-02 04:00:...",,,"[0.023723349, 0.0064210771, -0.0339970738, -0....","[-0.1006749049, 0.4022984803, -0.1821258366, 0..."


## Process data

In [2]:
listings = custom_utils.process_records(dataset_df)

## Connect to database

In [3]:
db, collection = custom_utils.connect_to_database()

Connection to MongoDB successful


In [42]:
collection.delete_many({})

DeleteResult({'n': 100, 'electionId': ObjectId('7fffffff000000000000001b'), 'opTime': {'ts': Timestamp(1730882596, 91), 't': 27}, 'ok': 1.0, '$clusterTime': {'clusterTime': Timestamp(1730882596, 100), 'signature': {'hash': b"\xf3hG\r\x17\x93bL'\xd7\xcf\x93\xa2\x01\xa6\xc9g\xf3\xaa\xf4", 'keyId': 7400109933816971265}}, 'operationTime': Timestamp(1730882596, 91)}, acknowledged=True)

## Data ingestion

In [43]:
collection.insert_many(listings)
print(f"Inserted {collection.count_documents({})} records into the collection")

Inserted 100 records into the collection


## Vector Search Index definition

In [4]:
custom_utils.setup_vector_search_index(db, collection)

TypeError: 'Collection' object is not iterable

In [33]:
def vector_search(user_query, db, collection, additional_stages=[], vector_index="vector_index_text"):
    """
    Perform a vector search in the MongoDB collection based on the user query.

    Args:
    user_query (str): The user's query string.
    db (MongoClient.database): The database object.
    collection (MongoCollection): The MongoDB collection to search.
    additional_stages (list): Additional aggregation stages to include in the pipeline.

    Returns:
    list: A list of matching documents.
    """
    # Generate embedding for the user query
    query_embedding = custom_utils.get_embedding(user_query)

    if query_embedding is None:
        return "Invalid query or embedding generation failed."

    # Define the vector search stage
    vector_search_stage = {
        "$vectorSearch": {
            "index": vector_index,
            "queryVector": query_embedding,
            "path": "text_embeddings",
            "numCandidates": 150,
            "limit": 20
        }
    }

    # Define the aggregate pipeline with the vector search stage and additional stages
    pipeline = [vector_search_stage] + additional_stages

    try:
        # Execute the search and time it
        import time
        start_time = time.time()
        
        # Execute search
        results = list(collection.aggregate(pipeline))
        
        # Try to get detailed execution stats if available
        try:
            explain_result = db.command(
                'explain',
                {
                    'aggregate': collection.name,
                    'pipeline': pipeline,
                    'cursor': {}
                },
                verbosity='executionStats'
            )
            if 'stages' in explain_result and explain_result['stages']:
                vector_stage = explain_result['stages'][0].get('$vectorSearch', {})
                if 'explain' in vector_stage and 'collectStats' in vector_stage['explain']:
                    db_time = vector_stage['explain']['collectStats'].get('millisElapsed', 0)
                    print(f"Database execution time: {db_time} milliseconds")
        except Exception as explain_error:
            pass  # Silently handle explain errors
        
        # Always show total execution time
        end_time = time.time()
        total_time = (end_time - start_time) * 1000
        print(f"Total execution time: {total_time:.2f} milliseconds")
        
        return results
        
    except Exception as e:
        print(f"Error during vector search: {e}")
        return []

## Handling User Query

In [34]:
from pydantic import BaseModel
from typing import Optional

class SearchResultItem(BaseModel):
    name: str
    accommodates: Optional[int] = None
    bedrooms: Optional[int] = None
    address: custom_utils.Address
    space: str = None

In [35]:
from IPython.display import display, HTML

def handle_user_query(query, db, collection, stages=[], vector_index="vector_index_text"):
    # Assuming vector_search returns a list of dictionaries with keys 'title' and 'plot'
    get_knowledge = vector_search(query, db, collection, stages, vector_index)

    # Check if there are any results
    if not get_knowledge:
        return "No results found.", "No source information available."

    # Convert search results into a list of SearchResultItem models
    search_results_models = [
        SearchResultItem(**result)
        for result in get_knowledge
    ]

    # Convert search results into a DataFrame for better rendering in Jupyter
    search_results_df = pd.DataFrame([item.dict() for item in search_results_models])

    # Generate system response using OpenAI's completion
    completion = custom_utils.openai.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {
                "role": "system", 
                "content": "You are a airbnb listing recommendation system."},
            {
                "role": "user", 
                "content": f"Answer this user query: {query} with the following context:\n{search_results_df}"
            }
        ]
    )

    system_response = completion.choices[0].message.content

    # Print User Question, System Response, and Source Information
    print(f"- User Question:\n{query}\n")
    print(f"- System Response:\n{system_response}\n")

    # Display the DataFrame as an HTML table
    display(HTML(search_results_df.to_html()))

    # Return structured response and source info as a string
    return system_response

## Adding Post filtering to vector search

In [36]:
import re
# Specifying the metadata field to limit documents on
search_path = "address.country"

# Create a match stage
match_stage = {
    "$match": {
       search_path: re.compile(r"United States"),
       "accommodates": { "$gt": 1, "$lt": 5}
    }
}

additional_stages = [match_stage]

In [37]:
query = """
I want to stay in a place that's warm and friendly, 
and not too far from resturants, can you recommend a place? 
Include a reason as to why you've chosen your selection"
"""
handle_user_query(query, db, collection, additional_stages)

Database execution time: 0 milliseconds
Total execution time: 945.82 milliseconds
- User Question:

I want to stay in a place that's warm and friendly, 
and not too far from resturants, can you recommend a place? 
Include a reason as to why you've chosen your selection"


- System Response:
I recommend the **Banyan Bungalow** located in Waialua, HI. This charming place is perfect for a warm and friendly atmosphere, ideal for relaxation and enjoyment. 

**Reason for recommendation**: The Banyan Bungalow not only accommodates up to 2 guests but also offers a lovely open space that is filled with natural light, creating a welcoming ambiance. Plus, being in Hawaii, you are surrounded by beautiful weather and a friendly local culture. While it may not specify exact restaurant proximities, Hawaii is known for its vibrant dining scene, and you should find various eateries nearby that showcase local cuisine. This balance of comfort, warmth, and accessibility to dining makes it a wonderful choi

Unnamed: 0,name,accommodates,bedrooms,address,space
0,Banyan Bungalow,2,0,"{'street': 'Waialua, HI, United States', 'government_area': 'North Shore Oahu', 'market': 'Oahu', 'country': 'United States', 'country_code': 'US', 'location': {'type': 'Point', 'coordinates': [-158.1602, 21.57561], 'is_location_exact': False}}","Big, open space with lots of natural light. The cottage is clean and quiet - perfect for a good night's sleep. Meals can be easily prepared in the small kitchen. Microwave, hot plate, toaster, blender, coffee maker, full size fridge are available."
1,A bedroom far away from home,2,1,"{'street': 'Queens, NY, United States', 'government_area': 'Briarwood', 'market': 'New York', 'country': 'United States', 'country_code': 'US', 'location': {'type': 'Point', 'coordinates': [-73.82257, 40.71485], 'is_location_exact': True}}","our place is a good sized apartment in a very quiet neighborhood. the bedroom is clean and cozy with a queen sized bed, so you can get all the rest you need."
2,Easy 1 Bedroom in Chelsea,2,1,"{'street': 'New York, NY, United States', 'government_area': 'Chelsea', 'market': 'New York', 'country': 'United States', 'country_code': 'US', 'location': {'type': 'Point', 'coordinates': [-74.00074, 40.74577], 'is_location_exact': True}}","*I listed this place late so can be flexible for you. I'm quick to respond - contact me and let's discuss whether we can make something work for you asap* Sorry about the lack of pictures. Will link you to Streeteasy when you reply. There is a small (regular for NYC) bedroom, a living room with couches and a TV (with apple tv but only basic cable), a kitchen, and a bathroom. It is in a walk up building a couple of flights up. Need relatively quiet guests for this residential building - definitely not a place for your weekend party."
3,March 2019 availability! Oceanview on Sugar Beach!,4,1,"{'street': 'Kihei, HI, United States', 'government_area': 'Kihei-Makena', 'market': 'Maui', 'country': 'United States', 'country_code': 'US', 'location': {'type': 'Point', 'coordinates': [-156.46881, 20.78621], 'is_location_exact': True}}","NIGHTLY RATE INCLUDES ALL TAXES! The Kealia Resort - Just a few steps from the warm sand of Sugar Beach, Maui's longest white-sand beach. With gorgeous views, this nicely updated, fourth floor, condo at the Kealia Resort beachfront complex is waiting for you! The six-mile stretch of beach, calm waters, and close proximity to town, make it the perfect vacation spot for honeymooners, families, and everyone in between! A beautiful and relaxing beachfront retreat. Treat yourself to long walks on the beach and spectacular evening sunsets. You'll enjoy fantastic whale watching from your balcony/lanai (in the winter months). Or grab your snorkel and swim with the green sea turtles. You'll love the warm and gentle ocean, perfect for swimming, kayaking or try windsurfing. Free WiFi high speed internet access. 42' flat screen TV, with cable. Lounge in the sun, and cool off in the oceanfront pool. Or grill some fresh seafood on the beachfront grills. Then take few steps to play on the beach."


'I recommend the **Banyan Bungalow** located in Waialua, HI. This charming place is perfect for a warm and friendly atmosphere, ideal for relaxation and enjoyment. \n\n**Reason for recommendation**: The Banyan Bungalow not only accommodates up to 2 guests but also offers a lovely open space that is filled with natural light, creating a welcoming ambiance. Plus, being in Hawaii, you are surrounded by beautiful weather and a friendly local culture. While it may not specify exact restaurant proximities, Hawaii is known for its vibrant dining scene, and you should find various eateries nearby that showcase local cuisine. This balance of comfort, warmth, and accessibility to dining makes it a wonderful choice for your stay.'

## Adding Pre-filtering to vector search

In [42]:
from pymongo.operations import SearchIndexModel
import time 

vector_index_with_filter = "vector_index_with_filter"

new_vector_search_index_model = SearchIndexModel(
    definition={
        "mappings": {
            "dynamic": True,
            "fields": {
                "text_embeddings": {
                    "dimensions": 1536,
                    "similarity": "cosine",
                    "type": "knnVector",
                },
                 "accommodates": {
                    "type": "number"
                },
                "bedrooms": {
                    "type": "number"
                },
            },
        }
    },
    name=vector_index_with_filter,
)

# Create the new index
try:
    result = collection.create_search_index(model=new_vector_search_index_model)
    print("Creating index...")
    time.sleep(20)  # Sleep for 20 seconds, adding sleep to ensure vector index has compeleted inital sync before utilization
    print("New index created successfully:", result)
except Exception as e:
    print(f"Error creating new vector search index: {str(e)}")

Creating index...
New index created successfully: vector_index_with_filter


In [43]:
def vector_search(user_query, db, collection, additional_stages=[], vector_index="vector_index_text"):
    query_embedding = custom_utils.get_embedding(user_query)
    if query_embedding is None:
        return "Invalid query or embedding generation failed."

    vector_search_stage = {
        "$vectorSearch": {
            "index": vector_index,  # specifies the index to use for the search
            "queryVector": query_embedding,  # the vector representing the query
            "path": "text_embeddings",  # field in the documents containing the vectors to search against
            "numCandidates": 150,  # number of candidate matches to consider
            "limit": 20,  # return top 20 matches
            "filter": {
                "$and": [
                    {"accommodates": {"$gte": 2}}, 
                    {"bedrooms": {"$lte": 7}}
                ]
            },
        }
    }
    
    pipeline = [vector_search_stage] + additional_stages
    
    try:
        # Execute the search and time it
        import time
        start_time = time.time()
        
        # Execute search
        results = list(collection.aggregate(pipeline))
        
        # Try to get detailed execution stats if available
        try:
            explain_result = db.command(
                'explain',
                {
                    'aggregate': collection.name,
                    'pipeline': pipeline,
                    'cursor': {}
                },
                verbosity='executionStats'
            )
            if 'stages' in explain_result and explain_result['stages']:
                vector_stage = explain_result['stages'][0].get('$vectorSearch', {})
                if 'explain' in vector_stage and 'collectStats' in vector_stage['explain']:
                    db_time = vector_stage['explain']['collectStats'].get('millisElapsed', 0)
                    print(f"Database execution time: {db_time} milliseconds")
        except Exception as explain_error:
            pass  # Silently handle explain errors
        
        # Always show total execution time
        end_time = time.time()
        total_time = (end_time - start_time) * 1000
        print(f"Total execution time: {total_time:.2f} milliseconds")
        
        return results
        
    except Exception as e:
        print(f"Error during vector search: {e}")
        return []

In [44]:
query = """
I want to stay in a place that's warm and friendly, 
and not too far from resturants, can you recommend a place? 
Include a reason as to why you've chosen your selection"
"""
handle_user_query(
    query, 
    db, 
    collection, 
    vector_index=vector_index_with_filter
)

Database execution time: 0 milliseconds
Total execution time: 2569.16 milliseconds
- User Question:

I want to stay in a place that's warm and friendly, 
and not too far from resturants, can you recommend a place? 
Include a reason as to why you've chosen your selection"


- System Response:
I recommend the **Cheerful new renovated central apt**. 

This place can accommodate up to 8 guests and has 3 bedrooms, making it perfect for gatherings with family or friends. It's located in a vibrant area which means you'll have easy access to a variety of restaurants and local attractions. The description mentions a friendly and inviting atmosphere, which aligns well with your desire for a warm and welcoming space. Plus, being centrally located, you'll never be far from great dining options to explore during your stay.



Unnamed: 0,name,accommodates,bedrooms,address,space
0,Cozy house at Beyoğlu,2,1,"{'street': 'Beyoğlu, İstanbul, Turkey', 'government_area': 'Beyoglu', 'market': 'Istanbul', 'country': 'Turkey', 'country_code': 'TR', 'location': {'type': 'Point', 'coordinates': [28.95825, 41.03777], 'is_location_exact': False}}","Safe, quite, big house, wiev, Central, near the bus stop."
1,Downtown Oporto Inn (room cleaning),2,1,"{'street': 'Porto, Porto, Portugal', 'government_area': 'Cedofeita, Ildefonso, Sé, Miragaia, Nicolau, Vitória', 'market': 'Porto', 'country': 'Portugal', 'country_code': 'PT', 'location': {'type': 'Point', 'coordinates': [-8.60867, 41.1543], 'is_location_exact': False}}","Cozy, located near the most interesting points of the city to provide a nice stay, with a low budget. Has a gift shop to buy handicraft, books and other gifts,"
2,Banyan Bungalow,2,0,"{'street': 'Waialua, HI, United States', 'government_area': 'North Shore Oahu', 'market': 'Oahu', 'country': 'United States', 'country_code': 'US', 'location': {'type': 'Point', 'coordinates': [-158.1602, 21.57561], 'is_location_exact': False}}","Big, open space with lots of natural light. The cottage is clean and quiet - perfect for a good night's sleep. Meals can be easily prepared in the small kitchen. Microwave, hot plate, toaster, blender, coffee maker, full size fridge are available."
3,Cheerful new renovated central apt,8,3,"{'street': 'Beyoğlu, İstanbul, Turkey', 'government_area': 'Beyoglu', 'market': 'Istanbul', 'country': 'Turkey', 'country_code': 'TR', 'location': {'type': 'Point', 'coordinates': [28.97477, 41.03735], 'is_location_exact': False}}","Hi there! My name is Aybike. I love to travel, to discover new places and to meet new people. I will be glad to hosting you in Istanbul at my place. My apartment is newly renovated, clean, cosy, comfortable, large enough for 8 people and is situated literally at the heart of Istanbul. Apartment has one of the unique examples of turn-of-the-century Levantine architecture in Turkey: slim, four-storey bow-fronted homes that huddle along winding, narrow streets. Located in a street as it was used to be; the ground floors often served as stores or workshops. More likes to come to your posts in Instagram ! As a traveller my wish is to make you feel at home; drink your morning coffee while listening to the sound of Istanbul then take your map, jump into the street with friendly neighborhood, and enjoy the city without running, walking, exploring, watching, reading and hearing. I know how it is important to be able to feel the city you are visiting. So if you are looking for a place where"
4,Homely Room in 5-Star New Condo@MTR,2,1,"{'street': 'Mongkok, Kowloon, Hong Kong', 'government_area': 'Yau Tsim Mong', 'market': 'Hong Kong', 'country': 'Hong Kong', 'country_code': 'HK', 'location': {'type': 'Point', 'coordinates': [114.17094, 22.32074], 'is_location_exact': False}}","You will stay with my son, my husband and me. We couple love travelling very much and have been to more than 35 countries in the past few years. We like to share our travel tips and photos with everyone. There is a luxury clubhouse in my building, with gym and swimming pool. The building is newly built and it's the most luxury one in Mongkok area."
5,Sydney Hyde Park City Apartment (checkin from 6am),2,1,"{'street': 'Darlinghurst, NSW, Australia', 'government_area': 'Sydney', 'market': 'Sydney', 'country': 'Australia', 'country_code': 'AU', 'location': {'type': 'Point', 'coordinates': [151.21346, -33.87603], 'is_location_exact': False}}","SPACE Comfortable 1 bedroom which has a queen-sized bed, is air conditioned and has a self-contained kitchen, including: fridge, dishwasher, oven, cooktop, microwave."
6,A bedroom far away from home,2,1,"{'street': 'Queens, NY, United States', 'government_area': 'Briarwood', 'market': 'New York', 'country': 'United States', 'country_code': 'US', 'location': {'type': 'Point', 'coordinates': [-73.82257, 40.71485], 'is_location_exact': True}}","our place is a good sized apartment in a very quiet neighborhood. the bedroom is clean and cozy with a queen sized bed, so you can get all the rest you need."
7,Surry Hills Studio - Your Perfect Base in Sydney,2,0,"{'street': 'Surry Hills, NSW, Australia', 'government_area': 'Sydney', 'market': 'Sydney', 'country': 'Australia', 'country_code': 'AU', 'location': {'type': 'Point', 'coordinates': [151.21554, -33.88029], 'is_location_exact': True}}",Comfortable studio with a great layout. The bathroom has a full size bath tub and shower. Complete set of kitchen utensils etc for cooking and a full size fridge. The 3 seater sofa and 42' TV mean you can comfortably relax after a day at the beach or exploring. Large lounge room windows provide harbour views.
8,Modern Spacious 1 Bedroom Loft,4,1,"{'street': 'Montréal, Québec, Canada', 'government_area': 'Le Plateau-Mont-Royal', 'market': 'Montreal', 'country': 'Canada', 'country_code': 'CA', 'location': {'type': 'Point', 'coordinates': [-73.59111, 45.51889], 'is_location_exact': True}}",Lot's of plants and lights. Really great modern bathroom that you will love showering in :) and a kitchen equipped with everything you need to cook a great meal. (we live here)
9,"Studio convenient to CBD, beaches, street parking.",5,1,"{'street': 'Balgowlah, NSW, Australia', 'government_area': 'Manly', 'market': 'Sydney', 'country': 'Australia', 'country_code': 'AU', 'location': {'type': 'Point', 'coordinates': [151.26108, -33.7975], 'is_location_exact': True}}","The room is a cozy and private basement studio with a bathroom and external, private entrance. There is a queen bed and a very comfortable king single. Two yellow chairs make out into twin beds. We have tried to be a comfortable and affordable base for Sydney travel by providing items that every traveler needs. We provide things like laundry soap, hair dryers, basic kitchen pantry items, a TV, etc. While we can sleep five (that is probably best for short one or two night stays.) BATHROOM. Shower, hairdryer, great water pressure. And if you forgot something (toothpaste, razor, etc.) just ask. KITCHENETTE: Has two induction hobs and George Foreman grill, dishwasher, toaster, tea kettle, fridge and freezer. BBQ and Balcony: For $2.00 per use you can use our outdoor BBQ and you are always welcome to eat on our covered balcony for free. (this is a shared space)."


"I recommend the **Cheerful new renovated central apt**. \n\nThis place can accommodate up to 8 guests and has 3 bedrooms, making it perfect for gatherings with family or friends. It's located in a vibrant area which means you'll have easy access to a variety of restaurants and local attractions. The description mentions a friendly and inviting atmosphere, which aligns well with your desire for a warm and welcoming space. Plus, being centrally located, you'll never be far from great dining options to explore during your stay."