In [None]:
!pip install requests
!pip install sentence-transformers faiss-cpu

In [1]:
import requests
import json
import pandas as pd
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np

In [2]:
df = pd.read_csv('./data/supervised_cleaned_airbnb_data.csv')

In [9]:
# Straight query without feeding the model data through RAG

query = """
A 1-bedroom apartment in Amsterdam with room for 2 people, a max distance to the city center 5km and metro distance of 2,5km. Outside of weekends
Estimate a fair nightly price in USD for this property as if it were listed on Airbnb.
Only return the price as a number.
"""

response = requests.post(
    "http://localhost:11434/api/generate",
    json={
        "model": "llama3",
        "prompt": query,
        "stream": False
    }
)

price = response.json()["response"]
print(f"Ollama estimated price: {price}")


Ollama estimated price: $120


In [11]:
# Prepare data using RAG

def row_to_description(row):
    return (
        f"A {row['bedrooms']}-bedroom apartment in {row['City']} with room for {row['person_capacity']} people, "
        f"max distance to the city center is {row['dist']} km and metro distance is {row['metro_dist']} km. "
        f"Nightly price: ${row['realSum']}"
    )

# Generate descriptions for all listings
descriptions = df.apply(row_to_description, axis=1).tolist()

# next step


# Load embedding model
embedder = SentenceTransformer('all-MiniLM-L6-v2')

# Generate embeddings
embeddings = embedder.encode(descriptions, convert_to_numpy=True)

# Create FAISS index
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)

print("Embedding done!")

In [13]:
# Queries and results
# "query" is the active variable used for the model. Rename variables to test out different queries.
# We are using the first entry in our data set as a description. The realSum is 194.03.

# Original prompt with no focus on the 4'ts.
# Result = $137.5 || top_k = 10
# Result = $108.1 || top_k = 5
query = """
A 1-bedroom apartment in City 1 with room for 2 people, a distance to the city center 5km and metro distance of 2.5km. Outside of weekends
"""

# Using the 4 T's in our prompt.
# Result = $123 || top_k = 10
# Result = $304.65 || top_k = 5
query2 = """
Calculate a recommended price for a listing with these parameters.

A 1-bedroom apartment in Amsterdam with room for 2 people, a distance to the city center 5km and metro distance of 2.5km. Outside of weekends

The city values in the data set are transformed to numeric values. These are the original values: 1. Amsterdam, 2. Athen, 3. Barcelona, 4. Berlin, 5. Budapest, 6. Lisbon, 7. London, 8. Paris, 9. Rome, 10. Vienna

Only return the price as a number.
"""

# Original prompt with City changed to Amsterdam instead of city 1 and less focus on the 4 T's.
# Result = $112.45 || top_k = 10
# Result = $122.25 || top_k = 5
query3 = """
A 1-bedroom apartment in Amsterdam with room for 2 people, a distance to the city center 5km and metro distance of 2.5km. Outside of weekends.

The city values in the data set are transformed to numeric values. These are the original values: 1. Amsterdam, 2. Athen, 3. Barcelona, 4. Berlin, 5. Budapest, 6. Lisbon, 7. London, 8. Paris, 9. Rome, 10. Vienna
"""

query_embedding = embedder.encode([query])

# Search top 10
top_k = 5
distances, indices = index.search(np.array(query_embedding), top_k)

# Get matched listing descriptions
retrieved_examples = [descriptions[i] for i in indices[0]]

# next step
context = "\n".join([f"{i+1}. {desc}" for i, desc in enumerate(retrieved_examples)])

final_prompt = f"""
Here are some similar Airbnb listings and their prices:

{context}

Now estimate the price for this listing:
{query}

Only return the estimated nightly price in USD as a number.
"""

response = requests.post(
    "http://localhost:11434/api/generate",
    json={
        "model": "llama3",
        "prompt": final_prompt,
        "stream": False
    }
)

price = response.json()["response"]
print(f"Ollama (RAG-based) estimated price: {price}")


Ollama (RAG-based) estimated price: Based on the similarities between the given listings and this new listing, I would estimate the nightly price to be around $108.1.

Note that all the given listings have the same number of bedrooms (1.0), same maximum occupancy (2.0 people), and similar distances to the city center and metro station. The only significant difference is the distance from the city center, which is 5km in this new listing compared to a maximum of 2.7km in the given listings.

Given that the prices for the given listings do not seem to vary significantly with distance, I would assume that the price for this new listing would also be around $108.1, since it has similar characteristics to the given listings.
