In [12]:
from IPython.display import display, Markdown
import pandas as pd
import requests
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler

In [13]:
# Load clustered Airbnb data from the user's GitHub project path
df = pd.read_csv("../data/clustered_airbnb.csv")

### LLaMA 3 Client  
This class provides a simple interface for interacting with a locally running LLaMA 3 model via Ollama's HTTP API. It sends structured prompts to the model and retrieves natural-language responses to assist in Airbnb listing recommendations.


In [15]:
class Llama3Client:
    def __init__(self, model="llama3", host="http://localhost:11434"):
        # Set the model name and API host address
        self.model = model
        self.api_url = f"{host}/api/chat"

    def ask(self, prompt: str) -> str:
        # Create the request payload for Ollama's chat endpoint
        payload = {
            "model": self.model,
            "messages": [{"role": "user", "content": prompt}],
            "stream": False  # Disable streaming for full response in one block
        }
        try:
            # Send POST request to local Ollama server
            response = requests.post(self.api_url, json=payload)
            response.raise_for_status()
            # Extract and return the assistant's message content
            return response.json()['message']['content'].strip()
        except requests.RequestException as e:
            # Return error if something went wrong
            return f"Error: {e}"

### Format Listings for Prompt  
This function converts a filtered DataFrame of Airbnb listings into a readable, structured text block. The formatted output is embedded into the LLM prompt to help the model understand and compare the listings.


In [17]:
def format_listings(listings_df, nights):
    listings = []

    for _, row in listings_df.iterrows():
        # Calculate total cost for the user's stay
        total_price = row['realSum'] * nights

        # Create a string representation of the listing
        listing = (
            f"{row['City']} (Cluster {row['cluster']}) - €{row['realSum']}/night, "
            f"{row['bedrooms']} bedrooms, "
            f"{row['dist']}km to center, "
            f"{row['metro_dist']}km to metro, "
            f"{row['guest_satisfaction_overall']} guest satisfaction, "
            f"Total: €{total_price:.2f}, "
            f"Value Score: {row['value_score']:.1f}"
        )

        listings.append(listing)

    # Combine all listings into a single multi-line string
    return "\n".join(listings)

### Semantic Retriever  
This function uses TF-IDF vectorization and cosine similarity to create a simple semantic search engine. It identifies the listings that best match the user's natural language query by ranking them based on textual similarity.


In [19]:
def retrieve_similar_listings(df, user_query, top_k=5):
    # Copy the original DataFrame to avoid modifying it directly
    df = df.copy()

    # Combine listing features into a single text blob for semantic search
    df['search_blob'] = df.apply(
        lambda row: f"{row['City']} {row['room_type']} {row['bedrooms']} bedrooms "
                    f"{row['dist']}km from center {row['metro_dist']}km from metro "
                    f"€{row['realSum']}/night {row['guest_satisfaction_overall']} rating", axis=1
    )

    # Transform all listings into TF-IDF vectors
    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform(df['search_blob'])

    # Transform the user's query into a TF-IDF vector
    query_vector = vectorizer.transform([user_query])

    # Compute cosine similarity between the query and all listings
    similarities = cosine_similarity(query_vector, tfidf_matrix).flatten()

    # Sort by similarity and return the top-k matches
    top_indices = similarities.argsort()[::-1][:top_k]
    return df.iloc[top_indices].copy()

### Value Score Calculation Function  
This function calculates a `value_score` for each Airbnb listing by combining key features such as price, proximity to the city center and metro, number of bedrooms, and guest satisfaction. The scores are normalized on a 0–100 scale, allowing the listings to be ranked by overall value for money.


In [21]:
def calculate_value_scores(df):
    # Work on a copy to avoid modifying the original DataFrame
    df = df.copy()

    # --- Invert features where lower values are better ---
    # Inverse price: cheaper is better
    df['inv_price'] = 1 / df['realSum']
    
    # Inverse distances (add 0.1 to avoid division by zero)
    df['inv_dist'] = 1 / (df['dist'] + 0.1)
    df['inv_metro'] = 1 / (df['metro_dist'] + 0.1)

    # --- Combine selected features into a scoring matrix ---
    # We use both direct (bedrooms, satisfaction) and inverse (price, distance) metrics
    scoring_data = pd.DataFrame({
        'price': df['inv_price'],                           # Favor cheaper listings
        'center': df['inv_dist'],                           # Favor closer to city center
        'metro': df['inv_metro'],                           # Favor closer to metro
        'bedrooms': df['bedrooms'],                         # Favor more rooms
        'satisfaction': df['guest_satisfaction_overall']    # Favor higher guest scores
    })
    
    # --- Normalize features using Min-Max scaling ---
    scaler = MinMaxScaler()
    normalized = scaler.fit_transform(scoring_data)

    # Average the normalized features to get a raw value score
    raw_score = normalized.mean(axis=1)

    # --- Final Value Score Scaling (0–100) ---
    max_score = raw_score.max()
    if pd.notna(max_score) and max_score != 0:
        df['value_score'] = (raw_score / max_score) * 100
    else:
        # Handle edge case: no variability or invalid score
        df['value_score'] = 0

    return df

### Recommended Best Listing Function
This function combines semantic search and value-based scoring to recommend the best Airbnb listing within a specific city, based on user preferences and budget. It uses a RAG (Retrieval-Augmented Generation) approach where filtered listings are passed to a local LLaMA 3 model for reasoning.

In [23]:
# --- Recommend Best Value (All Cities) using RAG-Powered Recommendation ---
def recommend_best_listing(df, user_query, budget, nights=5, top_k=5):
    # Step 1: Retrieve top-k semantically similar listings using the user's natural language query
    retrieved = retrieve_similar_listings(df, user_query, top_k=top_k)

    # Step 2: Add total price column (price per night * number of nights)
    retrieved['total_price'] = retrieved['realSum'] * nights

    # Step 3: Filter listings to include only those within the user's budget
    filtered = retrieved[retrieved['total_price'] <= budget]

    # Step 4: Return early if no listings match the criteria
    if filtered.empty:
        return "No listings found within your budget and preferences."

    # Step 5: Compute a custom value score based on price, distance, satisfaction, etc.
    filtered = calculate_value_scores(filtered)

    # Step 6: Format the filtered listings into text to be used in the LLM prompt
    context = format_listings(filtered, nights)

    # Step 7: Create a prompt that provides the listings and instructs the LLM to choose the best one
    prompt = f"""
You are a helpful, analytical travel assistant using semantic matching and listing data.

The user is looking for:
"{user_query}"

Their budget is €{budget} for {nights} nights.

Based on a semantic search, here are the top matching listings:

{context}

Pick the best match and explain your reasoning clearly and logically.

Respond in the following format:

<City> (Cluster <#>) - €<price>/night, <#> bedrooms, <x>km to center, <y>km to metro, <guest satisfaction>, Total: €<total>, Value Score: <score>

Here's why:
* Total price: ...
* Distance: ...
* Bedrooms: ...
* Satisfaction: ...
* Final recommendation: ...
"""

    # Step 8: Use the local LLaMA 3 model to generate a recommendation based on the prompt
    client = Llama3Client()
    return client.ask(prompt)

# --- Example Usage ---
user_query = "A quiet place near city center with good metro access and high ratings"
budget = 500
nights = 5
top_k = 10

# Generate and display recommendation using LLaMA 3
response = recommend_best_listing(df, user_query, budget, nights, top_k)
display(Markdown(response))


Based on the semantic search results, I recommend:

Athens (Cluster 0) - €92.8/night, 3 bedrooms, 2.2km to center, 0.4km to metro, 100 guest satisfaction, Total: €464.00, Value Score: 100.0

Here's why:

* Total price: The total price of €464.00 is well within the user's budget of €500 for 5 nights.
* Distance: The accommodation is only 2.2km to the city center and 0.4km to the metro, making it a convenient location with good public transportation access.
* Bedrooms: With 3 bedrooms, this option can comfortably accommodate a group or family, which might be an important consideration for some travelers.
* Satisfaction: The guest satisfaction rating is 100%, indicating that previous guests have been extremely satisfied with their stay.

Final recommendation:
I strongly recommend the Athens (Cluster 0) accommodation due to its exceptional value score of 100.0, excellent location, and high guest satisfaction rating. This option checks all the boxes for a quiet place near the city center with good metro access and high ratings, making it an ideal choice for the user's needs and budget.

### Recommend Best Listing in City Function
This function performs city-specific semantic retrieval and value-based scoring to recommend the best Airbnb listing that fits a user's preferences and budget. It applies a RAG (Retrieval-Augmented Generation) approach by passing the top filtered listings from a selected city into a locally hosted LLaMA 3 model for natural language reasoning and explanation.

In [25]:
# --- Recommend Best Listing in a Specific City using RAG-Powered Recommendation ---
def recommend_best_listing_in_city(df, city, user_query, budget, nights=5, top_k=5):
    # Step 1: Filter dataset by the specified city (case-insensitive)
    city_df = df[df['City'].str.lower() == city.lower()]
    if city_df.empty:
        return f"No listings found in {city.title()}."

    # Step 2: Perform semantic search on listings in this city using the user query
    # This uses TF-IDF to find listings that are textually similar to the query
    retrieved = retrieve_similar_listings(city_df, user_query, top_k=top_k)

    # Step 3: Compute total price for the given stay length and filter by budget
    retrieved['total_price'] = retrieved['realSum'] * nights
    filtered = retrieved[retrieved['total_price'] <= budget]

    if filtered.empty:
        return f"No listings found in {city.title()} matching your budget and preferences."

    # Step 4: Calculate a value score based on price, satisfaction, proximity, and other features
    filtered = calculate_value_scores(filtered)

    # Step 5: Format the filtered results into human-readable listing descriptions for the prompt
    context = format_listings(filtered, nights)

    # Step 6: Build the prompt to send to the LLaMA 3 model
    # This prompt contains user preferences, listings, and a specific output format
    prompt = f"""
You are a helpful, analytical travel assistant using semantic matching and listing data.

The user is looking for:
"{user_query}"

Their budget is €{budget} for {nights} nights, and they want to stay in {city.title()}.

Based on a semantic search within listings from this city, here are the top matches:

{context}

Pick the best match and explain your reasoning clearly and logically.

Respond in the following format:

<City> (Cluster <#>) - €<price>/night, <#> bedrooms, <x>km to center, <y>km to metro, <guest satisfaction>, Total: €<total>, Value Score: <score>

Here's why:
* Total price: ...
* Distance: ...
* Bedrooms: ...
* Satisfaction: ...
* Final recommendation: ...
"""

    # Step 7: Send the prompt to the local LLaMA 3 model and return its answer
    client = Llama3Client()
    return client.ask(prompt)

# --- Example Usage ---
response = recommend_best_listing_in_city(
    df,
    city="Berlin",
    user_query="quiet place near city center with good metro access and high ratings",
    budget=1000,
    nights=5,
    top_k=10
)

# Display the AI-generated recommendation in Markdown format
display(Markdown(response))

Berlin (Cluster 3) - €185.8/night, 1 bedrooms, 2.7km to center, 0.3km to metro, 100 guest satisfaction, Total: €929.00, Value Score: 92.7

Here's why:

* Total price: €929.00 is within the budget of €1000 for 5 nights.
* Distance: The apartment is only 2.7 km away from the city center and 0.3 km to metro, which meets the user's requirement for a quiet place with good metro access near the city center.
* Bedrooms: The apartment has one bedroom, meeting the user's requirement.
* Satisfaction: The guest satisfaction rating is 100%, indicating excellent reviews from previous guests.

Final recommendation:
I recommend Berlin (Cluster 3) - €185.8/night, 1 bedrooms, 2.7km to center, 0.3km to metro, 100 guest satisfaction, Total: €929.00, Value Score: 92.7. This apartment meets all the user's requirements and has excellent reviews from previous guests.

### Recommend Best Value Function (All Cities)
This function identifies the best overall Airbnb listing across all available cities by filtering based on user budget, computing value scores from multiple features (price, distance, satisfaction, etc.), and formatting the listings into a structured prompt. It sends this prompt to a locally hosted LLaMA 3 model, which returns a natural-language recommendation based on the top-ranked options.

In [27]:
# --- Recommend Best Value (All Cities) using to_string() Recommendation ---
def recommend_best_value(df, budget, nights=5, max_results=5):
    # Step 1: Calculate total cost for the user's stay
    df['total_price'] = df['realSum'] * nights

    # Step 2: Filter listings to only include those within the user's budget
    filtered = df[df['total_price'] <= budget]

    # Step 3: Sort listings by guest satisfaction and limit to top N results
    filtered = filtered.sort_values(by='guest_satisfaction_overall', ascending=False).head(max_results)

    # Step 4: If no listings are found after filtering, return a message
    if filtered.empty:
        return "No listings found within your budget."

    # Step 5: Calculate value scores for each listing using multiple features
    filtered = calculate_value_scores(filtered)

    # Step 6: Convert the filtered DataFrame to a plain-text table (no index)
    context = filtered.to_string(index=False)

    # Step 7: Construct a prompt to instruct the LLaMA 3 model to choose the best option
    prompt = f"""
You are a helpful, analytical travel assistant that uses logic and data to help users choose the best Airbnb listing.

Your task is to evaluate the Airbnb listings shown below and select the single best option for the user's travel needs.

The user has a total budget of €{budget} for {nights} nights. Below are the top Airbnb listings, already ranked by a calculated value score (out of 100):

{context}

Please consider the following criteria for your recommendation:
- Total price (should stay within budget)
- Distance to city center and metro
- Number of bedrooms
- Guest satisfaction score
- Cluster number

Return your response in the following format:

After analyzing the available listings, I would recommend the following option:

<Airbnb ID>, <City> (Cluster <#>) - €<price>/night, <#> bedrooms, <x>km to center, <y>km to metro, <guest satisfaction> guest satisfaction, Total: €<total price>, Value Score: <score>

Here's why:
* Value score: ...
* Total price: ...
* Distance to city center and metro: ...
* Number of bedrooms: ...
* Guest satisfaction: ...
* Cluster (if relevant): ...

Conclude with a confident recommendation summarizing why this is your top pick.
"""

    # Step 8: Query the local LLaMA 3 model with the structured prompt
    client = Llama3Client()
    return client.ask(prompt)

# --- Example Usage ---
budget = 500
nights = 5

# Generate and display the recommendation
response_1 = recommend_best_value(df, budget=budget, nights=nights)
display(Markdown(response_1))


After analyzing the available listings, I would recommend the following option:

2267, Athens (Cluster 2) - €69.60/night, 1 bedroom, 1.4km to center, 0.6km to metro, 91% guest satisfaction, Total: €348.00, Value Score: 69.6

Here's why:

* Value score: The listing has a value score of 69.6, which is relatively high compared to the other options.
* Total price: The total price of €348.00 falls within the user's budget of €500 for 5 nights (€69.60 per night).
* Distance to city center and metro: The listing is approximately 1.4km from the city center and 0.6km from the nearest metro station, which is a reasonable distance considering the value offered.
* Number of bedrooms: The property has one bedroom, which should be sufficient for most travelers.
* Guest satisfaction: The guest satisfaction rating is an impressive 91%, indicating that previous guests have had a positive experience at this property.

Overall, I believe this option provides the best combination of value, price, and amenities. While other listings may offer slightly better scores in individual categories, this one strikes a good balance across all criteria, making it my top pick for the user's travel needs.

### Recommend Best Listing in a Specific City Function
This function filters Airbnb listings by a specified city and budget, ranks them by guest satisfaction, calculates a composite value score, and sends the formatted top listings to a local LLaMA 3 model. The model then provides a personalized recommendation with reasoning based on multiple features like price, location, and guest satisfaction.

In [29]:
# --- Recommend Best Listing Within a Specific City using to_string() Recommendation ---
def recommend_in_city(df, city, budget, nights=5, max_results=5):
    # Step 1: Calculate total price for each listing based on the length of stay
    df['total_price'] = df['realSum'] * nights

    # Step 2: Filter listings by city (case-insensitive) and budget
    filtered = df[
        (df['City'].str.lower() == city.lower()) &
        (df['total_price'] <= budget)
    ]

    # Step 3: Sort listings by guest satisfaction and limit to the top results
    filtered = filtered.sort_values(by='guest_satisfaction_overall', ascending=False).head(max_results)

    # Step 4: Handle case where no listings are found
    if filtered.empty:
        return f"No listings found in {city.title()} within your budget."

    # Step 5: Compute value scores using normalized metrics (price, distance, satisfaction, etc.)
    filtered = calculate_value_scores(filtered)

    # Step 6: Convert the DataFrame to a text table for use in the LLM prompt
    context = filtered.to_string(index=False)

    # Step 7: Compose the LLM prompt with clear structure and evaluation criteria
    prompt = f"""
You are a helpful, analytical travel assistant using semantic matching and listing data.

The user is looking for:
"{user_query}"

Their budget is €{budget} for {nights} nights, and they want to stay in {city.title()}.

Based on a semantic search within listings from this city, here are the top matches:

{context}

Pick the best match and explain your reasoning clearly and logically.

Respond in the following format:

<City> (Cluster <#>) - €<price>/night, <#> bedrooms, <x>km to center, <y>km to metro, <guest satisfaction> guest satisfaction, Total: €<total>, Value Score: <score>

Here's why:
* Total price: ...
* Distance: ...
* Bedrooms: ...
* Satisfaction: ...
* Final recommendation: ...
"""

    # Step 8: Query the local LLaMA 3 model with the structured prompt
    client = Llama3Client()
    return client.ask(prompt)

# --- Example Usage ---

# Set user parameters
city = "Barcelona"
budget = 500
nights = 5

# Call the function to get the LLM's recommendation
response = recommend_in_city(df, city=city, budget=budget, nights=nights)

# Display the AI-generated recommendation using markdown formatting
display(Markdown("### Recommendation in City"))
display(Markdown(response))


### Recommendation in City

Barcelona (Cluster 3) - €98.05/night, 1 bedroom, 0.5km to center, 0.196078km to metro, 0.9 guest satisfaction, Total: €491.05, Value Score: 66.705206

Here's why:

* Total price: The total price of this option is within the user's budget of €500 for 5 nights.
* Distance: This listing is only 0.5km to city center, which meets the user's requirement for a quiet place near city center.
* Bedrooms: The property has 1 bedroom, which is suitable for the user since they didn't specify any specific number of bedrooms.
* Satisfaction: The guest satisfaction rating is 0.9 out of 1.0, which indicates that most guests have been satisfied with their stay at this property.
* Final recommendation: Based on these factors, I recommend this option as the best match for the user's requirements.

### Cluster Document Creation
Creates detailed text descriptions for each cluster by calculating key statistics like average prices, satisfaction scores, and locations. This helps us understand and compare the different property groups.

The clusters were created using K-means clustering on the property features. Each cluster represents a group of similar properties based on their characteristics like price, location, size, and ratings.

In [31]:
# Functions to analyze clusters
def create_cluster_documents(df):
    """Create text documents describing each cluster's characteristics."""
    cluster_docs = []
    cluster_ids = []
    
    # Get all unique clusters
    clusters = sorted(df['cluster'].unique())
    
    for cluster in clusters:
        cluster_data = df[df['cluster'] == cluster]
        
        # Calculate statistics for this cluster
        avg_price = cluster_data['realSum'].mean()
        avg_satisfaction = cluster_data['guest_satisfaction_overall'].mean()
        avg_dist = cluster_data['dist'].mean()
        avg_metro_dist = cluster_data['metro_dist'].mean()
        avg_bedrooms = cluster_data['bedrooms'].mean()
        
        # Room type distribution
        room_types = cluster_data['room_type'].value_counts(normalize=True).to_dict()
        
        # City distribution
        city_dist = cluster_data['City'].value_counts(normalize=True).to_dict()
        
        # Create a document describing this cluster
        doc = f"Cluster {cluster} characteristics:\n"
        doc += f"Average price: €{avg_price:.2f} per night\n"
        doc += f"Average guest satisfaction: {avg_satisfaction:.2f}/100\n"
        doc += f"Average distance to city center: {avg_dist:.2f}km\n"
        doc += f"Average distance to metro: {avg_metro_dist:.2f}km\n"
        doc += f"Average number of bedrooms: {avg_bedrooms:.2f}\n"
        
        doc += "Room type distribution:\n"
        for room_type, percentage in room_types.items():
            doc += f"- {room_type}: {percentage*100:.1f}%\n"
            
        doc += "City distribution:\n"
        for city, percentage in city_dist.items():
            doc += f"- {city}: {percentage*100:.1f}%\n"
        
        cluster_docs.append(doc)
        cluster_ids.append(cluster)
        
    return cluster_docs, cluster_ids

### Initialize Cluster Documents
Runs the function above to create text descriptions for each property cluster. These descriptions will be used for semantic search and analysis later.

Each document contains key statistics about the properties in that cluster, including average prices, satisfaction scores, distances, and distributions of room types and cities.

In [33]:
# Create the cluster documents and initialize the vectorizer
cluster_docs, cluster_ids = create_cluster_documents(df)

### Search Through Clusters
Uses text analysis with TF-IDF vectorization and cosine similarity to find clusters that best match what users are looking for in their ideal property.

The search compares the user's query against the detailed cluster descriptions we created above, finding the most semantically similar matches. This helps users discover property groups that align with their preferences, even if they don't use exact matching terms.

In [35]:
# Create a TF-IDF vectorizer for text search
vectorizer = TfidfVectorizer(stop_words='english')
document_vectors = vectorizer.fit_transform(cluster_docs)

def retrieve_relevant_docs(query, top_k=3):
    """Find the clusters that best match a search query."""
    query_vector = vectorizer.transform([query])
    similarities = cosine_similarity(query_vector, document_vectors).flatten()
    top_indices = similarities.argsort()[-top_k:][::-1]
    
    # Return both the documents and their cluster IDs
    return [cluster_docs[i] for i in top_indices], [cluster_ids[i] for i in top_indices], similarities[top_indices]

### Name the Clusters
Uses the Llama 3 model to generate intuitive, descriptive names for each cluster based on their distinctive characteristics. 

The naming process analyzes multiple feature groups (Distance, Accommodation, Quality, Price, and Superhost status) to identify what makes each cluster unique. The model then creates memorable names that highlight these key traits, making it easier for users to understand the different types of properties available.

For example, a cluster might be named "Urban Luxury Oasis" if it contains high-end properties close to city centers with excellent ratings.

In [37]:
# Function to name clusters based on key traits
def name_clusters(df):
    """Give each cluster a descriptive name based on its main features."""
    # Get cluster documents
    cluster_docs, cluster_ids = create_cluster_documents(df)
    # Define feature groups for cluster analysis
    feature_groups = {
        'Distance': ['dist', 'metro_dist'],
        'Accommodation': ['bedrooms', 'person_capacity'], 
        'Quality': ['cleanliness_rating', 'guest_satisfaction_overall'],
        'Price': ['realSum'],
        'Superhost': ['host_is_superhost_bool']
    }
    
    # Create descriptive names for each cluster
    client = Llama3Client()
    
    for cluster_id in cluster_ids:
        cluster_data = df[df['cluster'] == cluster_id]
        
        # Calculate feature group averages
        group_stats = {}
        for group_name, features in feature_groups.items():
            # Calculate mean of standardized values for features in group
            feature_means = []
            for feature in features:
                if feature in cluster_data.columns:
                    mean = cluster_data[feature].mean()
                    std = df[feature].std()  # Using full dataset std
                    feature_means.append((mean - df[feature].mean()) / std)
            if feature_means:
                group_stats[group_name] = np.mean(feature_means)
        
        # Sort groups by absolute z-score to find most distinctive traits
        distinctive_groups = sorted(group_stats.items(), key=lambda x: abs(x[1]), reverse=True)
        top_groups = [g[0] for g in distinctive_groups[:3]]
        
        # Get the cluster document
        doc = cluster_docs[cluster_ids.index(cluster_id)]
        
        prompt = f"""[Task]
        Create a concise, memorable name for an Airbnb property cluster that captures its key characteristics, unique to the specific cluster.
        
        [Target]
        - Primary: Potential Airbnb guests looking for specific property types
        - Secondary: Property managers seeking to position their listings
        
        [Tone]
        Professional yet approachable, using clear and appealing language suitable for property listings

        [Trait]
        You are a branding-savvy data analyst with experience in real estate and hospitality. 
        You understand how to translate statistical insights into marketable, intuitive names that resonate with both guests and hosts.
        
        [Technical Details]
        1. Output Format: Return ONLY a cluster name following this exact pattern:
           "Descriptive Name ({', '.join(top_groups)})"
           Example: "Urban Luxury Oasis (Distance: 2.1 km to center, Quality: 4.8 rating, Price: $250 per night)"
        
        2. Key Data Points:
           - Distinctive feature groups: {', '.join(top_groups)}
           - Cluster characteristics:
             {doc}
        
        3. Requirements:
           - Name must be immediately understandable
           - Include specific metrics for each feature group
           - Accurately reflect the data-driven grouping
        """
        
        cluster_name = client.ask(prompt)
        display(Markdown(f"**Cluster {cluster_id}:** {cluster_name}"))

# Generate cluster names
name_clusters(df)

**Cluster 0:** Based on the provided data, I suggest the following cluster name:

"Elegant Urban Retreats (Average Price: €436.14 per night, Satisfaction: 93.30/100, Distance: 2.79km to city center)"

This name captures the key characteristics of the cluster, including:

* Accommodation: "Elegant" and "Urban Retreats" convey a sense of luxury and comfort.
* Price: The specific average price is included to provide guests with a clear idea of what to expect.
* Superhost: The high guest satisfaction rating (93.30/100) implies that the hosts are reliable and trustworthy.

The name is concise, easy to understand, and accurately reflects the data-driven grouping.

**Cluster 1:** Based on the provided data, I would suggest the following cluster name:

"City Hub Luxe (Distance: 2.7 km avg., Quality: 4.8 rating, Mix of Entire Home/Apt & Private Rooms)"

This name meets all the requirements and accurately reflects the characteristics of the cluster:

* It's immediately understandable, using clear and concise language.
* It includes specific metrics for each feature group:
	+ Distance: 2.7 km avg., which is close to the actual average distance of 3.37km to city center.
	+ Quality: 4.8 rating, which corresponds to the high guest satisfaction rate of 71.66/100.
* It accurately reflects the data-driven grouping by highlighting the mix of entire home/apt and private room options.

Overall, this name provides a concise and memorable representation of the cluster's key characteristics, making it an attractive choice for both potential Airbnb guests and property managers.

**Cluster 2:** Based on the provided data, I recommend the following cluster name:

"City Center Haven (Superhost: 97.09%, Quality: 4.8 rating, Distance: 2.33 km)"

This name captures the key characteristics of the cluster:

* "City Center Haven" emphasizes the proximity to city centers, which is a distinctive feature group.
* The specific metrics for each feature group are included:
	+ "Superhost: 97.09%" highlights the high guest satisfaction rating and the presence of Superhosts in this cluster.
	+ "Quality: 4.8 rating" reflects the average quality rating of the properties within the cluster.
	+ "Distance: 2.33 km" provides a clear indication of the distance to city centers, which is an important factor for potential guests.

This name is concise, memorable, and accurately reflects the data-driven grouping.

**Cluster 3:** Based on the provided data, I suggest the following concise and memorable name for the Airbnb property cluster:

"Central Charm Cottages (Price: €193.89/night, Quality: 94.04%, Distance: 3.45km to city center)"

This name captures the essence of the cluster by highlighting its central location, quality ratings, and price point. The use of "Charm Cottages" also evokes a sense of coziness and appeal, making it more likely to resonate with potential guests.

Here's a breakdown of how the name meets the requirements:

* Immediately understandable: The name is straightforward and easy to comprehend, providing a clear idea of what the cluster offers.
* Includes specific metrics for each feature group:
	+ Price: €193.89/night (average price per night)
	+ Quality: 94.04% (average guest satisfaction rating)
	+ Distance: 3.45km to city center (average distance from the property to the city center)
* Accurately reflects the data-driven grouping: The name accurately represents the cluster's characteristics, highlighting its central location and quality ratings while also providing a sense of what guests can expect in terms of price.

Overall, "Central Charm Cottages" is a memorable and informative name that effectively positions the Airbnb property cluster for potential guests and property managers alike.

**Cluster 4:** Based on the provided data, I recommend the following cluster name:

"City Chic Suites (Superhosts: 21/8, Entire Homes: 98.2%, Distance: 2.69 km to city center, €263.25 per night)"

Here's a breakdown of how this name meets the requirements:

1. **Immediate understandability**: The name is straightforward and clearly communicates the type of properties within the cluster (City Chic Suites).
2. **Specific metrics for each feature group**:
	* Superhosts: 21/8 indicates that 21 out of every 8 properties are managed by Superhosts, providing a sense of trust and reliability.
	* Entire Homes: 98.2% highlights the dominant type of accommodation within the cluster (entire homes or apartments).
	* Distance: 2.69 km to city center provides a clear indication of the proximity to the city center.
	* €263.25 per night reflects the average price point for properties in this cluster.
3. **Accurate reflection of data-driven grouping**: The name captures the essence of the cluster, which appears to be focused on upscale accommodations in European cities (indicated by the high presence of City Chic Suites and Superhosts).

This name is both memorable and informative, making it an effective tool for attracting potential guests and property managers alike.

**Cluster 5:** Based on the provided data, I recommend the following cluster name:

"City Haven (Distance: 9.23 km to city center, Price: €187.58 per night, Accommodation: 63% Private Rooms & 36% Entire Homes)"

This name accurately reflects the average distance to the city center, average price per night, and accommodation type distribution of Cluster 5. The use of "City Haven" as the descriptive name emphasizes the cluster's proximity to urban areas while also conveying a sense of comfort and relaxation.

Here's a breakdown of the name:

* "Descriptive Name": City Haven
* "(Distance)": 9.23 km to city center, indicating the average distance to the city center.
* "(Price)": €187.58 per night, showing the average price per night for properties in this cluster.
* "(Accommodation)": 63% Private Rooms & 36% Entire Homes, highlighting the most common types of accommodations offered within the cluster.

Overall, this name provides a clear and concise representation of the key characteristics that define Cluster 5, making it easily understandable for both guests and hosts.