In [None]:
from datetime import date
from dash import Dash, dcc, html, Input, Output, callback, State
from bs4 import BeautifulSoup
import requests
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from IPython.display import HTML

app = Dash(__name__)

app.layout = html.Div([
    html.Div(className='row', children='Hotel Data Dashboard',
             style={'textAlign': 'center', 'color': 'black', 'fontSize': 30}),
    html.Div([
        "城市: ",
        dcc.Input(id='search', value='台北', type='text'),
        " 日期： ",
        dcc.DatePickerRange(
        id='date-picker',
        min_date_allowed=date(1995, 8, 5),
        max_date_allowed=date(2025, 12, 30),
        initial_visible_month=date(2023, 11, 24),
        end_date=date(2023, 11, 25),
        start_date=date(2023, 11, 24)
    ),
    html.Button(id='submit-button-state', n_clicks=0, children='搜尋!')
    ], style = {'textAlign': 'center'}),
    html.Br(),
    dcc.Graph(id='graph'),
])


@callback(
    Output(component_id='graph', component_property='figure'),
    Input('submit-button-state', 'n_clicks'),
    State(component_id='search', component_property='value'),
    State('date-picker', 'start_date'),
    State('date-picker', 'end_date')
)
def update_graph(clicks, search, startDate, endDate):
       
    url = f"https://www.booking.com/searchresults.zh-tw.html?label=gen173nr-1FCAQoggJCDXNlYXJjaF_lj7DljJdIMFgEaOcBiAEBmAEwuAEXyAEM2AEB6AEB-AEGiAIBqAIDuALywuOqBsACAdICJGZkYmNjYWNlLWE3MDktNGE2Yy1hNjIyLWQxN2FlNTNjMzM0MdgCBeACAQ&aid=3976&ss={search}&checkin={startDate}&lang=zh-tw&sb=1&src_elem=sb&checkout={endDate}&offset=0"
    headers = {
        'User-Agent': 'Mozilla/5.0 (X11; CrOS x86_64 8172.45.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.64 Safari/537.36',
        'Accept-Language': 'en-US, en;q=0.5'
    }
    
    hotels_data = []
    i = 0
    
    while(len(hotels_data) < 100):
        if i > 20:
            break
        url = url[:url.rfind("=")+1]
        url += str(i*25)
        response  = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.text, 'html.parser')
        hotels = soup.findAll('div', {'data-testid': 'property-card'})
        for hotel in hotels:
            try:
                name_element = hotel.find('div', {'class': 'f6431b446c a15b38c233'})
                name = name_element.text.strip()

                location_element = hotel.select_one('[data-testid="address"]')
                location = location_element.text.strip()

                distance_element = hotel.select_one('[data-testid="distance"]')
                distance = distance_element.text.strip()

                price_element = hotel.select_one('[data-testid="price-and-discounted-price"]')
                price = price_element.text.strip()

                rating_element = hotel.select_one('[data-testid="review-score"]')
                rating = rating_element.text.strip()

                comment_element = hotel.find('div', {'class': 'a3b8729ab1 e6208ee469 cb2cbb3ccb'})
                comment = comment_element.text.strip()

                hotels_data.append([
                    name,
                    location,
                    price,
                    rating,
                    distance,
                    comment
                ])
            except:
                continue
                
        hotels_data = pd.DataFrame(hotels_data)
        hotels_data.drop_duplicates(inplace=True)
        hotels_data = hotels_data.values.tolist()
        i += 1


    #Data cleaning-----------------------------------------------------------------------------------------
    for row in hotels_data:
        #Convert type of price
        row[2] = float("".join(row[2][4:].split(",")))

        #Convert type of rating and handle missing values.
        if row[3] == '':
            row[3] = -1.0
        else:
            row[3] = row[3][0:3]
            if row[3] == "10傑":
                row[3] = 10
            row[3] = float(row[3])
        if row[5] == '': row[5] = "None"

        #convert type of distance
        if row[4][-2:] == '公里':
            row[4] = int(1000 * float(row[4].split(" ")[1]))
        else:
            row[4] = int(row[4].split(" ")[1])

    cooked = hotels = pd.DataFrame(hotels_data)
    cooked.head()


    #plotting----------------------------------------------------------------------------------------------
    cooked.columns = ["Name", "Location", "Price", "Rating", "Distance", "Comment"]
    cooked.Rating = pd.to_numeric(cooked.Rating)
    fig = px.scatter(cooked, x = "Price", y = "Distance", hover_data = ["Name","Price","Rating"], color = "Rating")
    
    return fig
if __name__ == '__main__':
    app.run(port=8051)