In [None]:
from IPython.display import display, Markdown
import pandas as pd
import requests

In [None]:
# Load clustered Airbnb data from the user's GitHub project path
df = pd.read_csv("../ML-Exam/data/clustered_airbnb.csv")

In [None]:
# Define the LLaMA 3 client
class Llama3Client:
    def __init__(self, model="llama3", host="http://localhost:11434"):
        self.model = model
        self.api_url = f"{host}/api/chat"

    def ask(self, prompt: str) -> str:
        payload = {
            "model": self.model,
            "messages": [{"role": "user", "content": prompt}],
            "stream": False
        }
        try:
            response = requests.post(self.api_url, json=payload)
            response.raise_for_status()
            return response.json()['message']['content'].strip()
        except requests.RequestException as e:
            return f"Error: {e}"

# Format listings for display in the prompt
def format_listings(listings_df, nights):
    listings = []
    for _, row in listings_df.iterrows():
        total_price = row['realSum'] * nights
        listing = (
            f"{row['City']} (Cluster {row['cluster']}) - €{row['realSum']}/night, "
            f"{row['bedrooms']} bedrooms, "
            f"{row['dist']}km to center, "
            f"{row['metro_dist']}km to metro, "
            f"{row['guest_satisfaction_overall']} guest satisfaction, "
            f"Total: €{total_price:.2f}"
        )
        listings.append(listing)
    return "\n".join(listings)

In [None]:
# 1. Function for best value (all cities) using format_listings
def recommend_best_value(df, budget, nights=5, max_results=5):
    df['total_price'] = df['realSum'] * nights
    filtered = df[df['total_price'] <= budget].sort_values(by='guest_satisfaction_overall', ascending=False).head(max_results)

    if filtered.empty:
        return "No listings found within your budget."
    
    context = format_listings(filtered, nights)
    prompt = f"""
You are a travel assistant helping users choose the best Airbnb option from clustered data.

The user has a budget of €{budget} for {nights} nights. Below are some available listings:

{context}

For each listing, consider:
- Total price
- Distance to city center and metro
- Number of bedrooms
- Guest satisfaction
- Cluster number

Which listing would you recommend and why? Include all these aspects in your answer.
"""

    client = Llama3Client()
    return client.ask(prompt)

nights = 5
budget = 500

response_1 = recommend_best_value(df, budget, nights)

In [None]:
# Display results
display(Markdown("### Recommendation (Best Value Overall):"))
display(Markdown(response_1))

In [None]:
# 2. Function for a specific city using format_listings
def recommend_in_city(df, city, budget, nights=5, max_results=5):
    df['total_price'] = df['realSum'] * nights
    filtered = df[(df['City'].str.lower() == city.lower()) & (df['total_price'] <= budget)]
    filtered = filtered.sort_values(by='guest_satisfaction_overall', ascending=False).head(max_results)

    if filtered.empty:
        return f"No listings found in {city.title()} within your budget."

    context = format_listings(filtered, nights)
    prompt = f"""
You are a travel assistant helping users choose the best Airbnb option in {city.title()} from clustered data.

The user has a budget of €{budget} for {nights} nights. Below are some available listings in {city.title()}:

{context}

For each listing, consider:
- Total price
- Distance to city center and metro
- Number of bedrooms
- Guest satisfaction
- Cluster number

Based on value, distance, and cluster, which listing would you recommend and why?
"""
    client = Llama3Client()
    return client.ask(prompt)

# Example usage (you can replace these with input() calls or widgets in a notebook)
nights = 5
budget = 500
city = "Berlin"

response_2 = recommend_in_city(df, city, budget, nights)

display(Markdown(f"### Recommendation (City: {city.title()}):"))
display(Markdown(response_2))


In [None]:
# 1. Function for best value (all cities) using .to_string()
def recommend_best_value(df, budget, nights=5, max_results=5):
    df['total_price'] = df['realSum'] * nights
    filtered = df[df['total_price'] <= budget].sort_values(by='guest_satisfaction_overall', ascending=False).head(max_results)

    if filtered.empty:
        return "No listings found within your budget."
    
    context = filtered.to_string(index=False)

    prompt = f"""
You are a travel assistant helping users choose the best Airbnb option from clustered data.

The user has a budget of €{budget} for {nights} nights. Below are some available listings as a table:

{context}

Please evaluate each listing and recommend the best one. Be sure to consider:
- Total price
- Distance to city center and metro (dist & metro_dist)
- Number of bedrooms (bedrooms)
- Guest Satisfaction (guest_satisfaction_overall)
- Cluster

Explain your reasoning clearly using all available details and remember to mention which city the airbnb is located in.
"""
    client = Llama3Client()
    return client.ask(prompt)

In [None]:
response_1 = recommend_best_value(df, budget=500, nights=5)
display(Markdown(response_1))


In [None]:
# 2. Function for a specific city using .to_string()
def recommend_in_city(df, city, budget, nights=5, max_results=5):
    df['total_price'] = df['realSum'] * nights
    filtered = df[
        (df['City'].str.lower() == city.lower()) &
        (df['total_price'] <= budget)
    ].sort_values(by='guest_satisfaction_overall', ascending=False).head(max_results)

    if filtered.empty:
        return f"No listings found in {city.title()} within your budget."
    
    context = filtered.to_string(index=False)

    prompt = f"""
You are a travel assistant helping users choose the best Airbnb option in {city.title()}.

The user has a budget of €{budget} for {nights} nights. Below are the available listings in table format:

{context}

Please evaluate the listings and recommend the best one. Include reasoning based on:
- Total cost
- Distance to attractions
- Bedrooms
- Guest satisfaction
- Cluster number
"""
    client = Llama3Client()
    return client.ask(prompt)