In [2]:
import requests
import pandas as pd
import plotly.express as px
from bs4 import BeautifulSoup
from datetime import date
from dash import Dash, html, dcc, callback, Output, Input

In [3]:
header = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36'
}

app = Dash(__name__)

@callback(
        Output("scatter_plot", "figure"),
        Input("location_selecter", "value"),
        Input("date_selecter", "start_date"),
        Input("date_selecter", "end_date")
)
def plotter(value, start_date, end_date):

    hotels_lst = []

    for offset in range(0, 125, 25):

        URL = f"https://www.booking.com/searchresults.zh-tw.html?ss={value}&ssne={value}&ssne_untouched={value}&label=gen173nr-1BCAEoggI46AdIM1gEaOcBiAEBmAEwuAEHyAEN2AEB6AEBiAIBqAIDuALGzPOqBsACAdICJDhmYTAwOTYzLTNmODktNGU3Ny05NTQ2LWU0ZGNhZmY0M2Y4YdgCBeACAQ&sid=1fa01af5e49730258126b119da65c366&aid=304142&lang=zh-tw&sb=1&src_elem=sb&dest_type=city&checkin={start_date}&checkout={end_date}&group_adults=1&no_rooms=1&group_children=0&offset={offset}"

        content = requests.get(URL, headers=header).text
        soup = BeautifulSoup(content, "html.parser")

        hotels_raw = soup.find_all('div', {'data-testid': 'property-card'})
        for hotel in hotels_raw:

            name_raw = hotel.find('div', {'data-testid': 'title'}) 
            location_raw = hotel.find('span', {'data-testid': 'address'})
            price_raw = hotel.find('span', {'data-testid': 'price-and-discounted-price'})
            rating_raw = hotel.find('div', {'class': 'a3b8729ab1 d86cee9b25'})
            distance_raw = hotel.find('span', {'data-testid': 'distance'})
            comment_raw = hotel.find('div', {'class': 'a3b8729ab1 e6208ee469 cb2cbb3ccb'})

            if name_raw != None and location_raw != None and price_raw != None and rating_raw != None and distance_raw != None and comment_raw != None:

                name = name_raw.text.strip()
                location = location_raw.text.strip()
                price = price_raw.text.strip()
                rating = rating_raw.text.strip()
                distance = distance_raw.text.strip()
                meter = True if (distance[-1] == "尺") else False
                distance = distance.replace("距中心 ", "").replace(" 公里", "").replace(" 公尺", "")
                comment = comment_raw.text.strip()

                hotels_lst.append({
                    'name': name,
                    'location': location,
                    'price': int(price[4:].replace(',', '')),
                    'rating': float(rating),
                    'distance': float(distance) / 1000 if meter else float(distance),
                    'comment': str(comment)
                })

    hotels_df = pd.DataFrame(hotels_lst)
    fig = px.scatter(hotels_df, x="price", y="distance", color="rating", hover_name="name", hover_data=["location", "comment"])
    return fig

app.layout = html.Div([
    html.H1(dcc.Input(id="location_selecter", value="倫敦", type="text"), style={'textAlign': 'center'}),
    html.H2(dcc.DatePickerRange(
        id="date_selecter",
        min_date_allowed=date(2023, 12, 1),
        max_date_allowed=date(2025, 2, 28),
        initial_visible_month=date(2023, 12, 3),
        start_date=date(2023,12, 3),
        end_date=date(2023, 12, 9)
    ), style={'textAlign': 'center'}),
    dcc.Graph(id="scatter_plot", figure=plotter("倫敦", "2023-12-03", "2023-12-09"))
])

if __name__ == '__main__':
    app.run(debug=True)