## Demo

In [1]:
import time
import random as rd
import pandas as pd
import numpy as np
from tqdm import tqdm

import requests # 페이지를 요청하는 기능 
from bs4 import BeautifulSoup # 파이썬으로 HTML을 다루는 기능
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
# driver = webdriver.Chrome(ChromeDriverManager().install())

from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options

user_agent = "Mozilla/5.0 (Linux; Android 9; SM-G975F) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.83 Mobile Safari/537.36"
headers = {
    "User-Agent": user_agent
}

In [2]:
def setRestaurantPages(url: str):
    url = url   
    response = requests.get(url, headers=headers)
    response.raise_for_status()  # 200번대 코드가 아니면 코드가 멈춤
    print("Response successful!")
    html = response.text
    soup = BeautifulSoup(html, "html.parser")

    restraurant_pages = []
    total_restraurants = int(soup.find_all('span', class_="b")[1].text)
    main_pages = total_restraurants//30 if total_restraurants%30==0 else total_restraurants//30+1
    for i in tqdm(range(main_pages-1), desc="Set restaurant pages"):
        if i > 0:
            current_url = url[:48] + f'oa{i*30}' + url[47:]
            # print(f'current_url: {current_url}')
            response = requests.get(current_url, headers=headers)
            response.raise_for_status()
            html = response.text
            soup = BeautifulSoup(html, "html.parser")

        for restraurant_page in soup.find_all('div', class_="biGQs _P fiohW alXOW NwcxK GzNcM ytVPx UTQMg RnEEZ ngXxk"):
            restraurant_pages.append("https://www.tripadvisor.com/" + restraurant_page.find('a')['href'])
            # print(restraurant_page.find('a')['href'])
        time.sleep
        # print(f'Total restraurant pages: {len(restraurant_pages)}')
        
    print(f'Total restraurant pages: {len(restraurant_pages)}')
    
    return restraurant_pages

In [3]:
def createDataset():
    dataset = pd.DataFrame({
        "name": [],
        "category": [],
        "description": [],
        "time": [],
        "URL": [],
        "priceLow": [],
        "priceHigh": [],
        "reviews": [],
        "adress": [],
        "rating": []
    })
    
    return dataset

In [9]:
def setWebdriver():
    options = Options()
    options.add_argument('user-agent=' + user_agent)
    options.add_argument('headless') #headless모드 브라우저가 뜨지 않고 실행됩니다.
    # options.add_argument('--window-size= x, y') #실행되는 브라우저 크기를 지정할 수 있습니다.
    # options.add_argument('--start-maximized') #브라우저가 최대화된 상태로 실행됩니다.
    # options.add_argument('--start-fullscreen') #브라우저가 풀스크린 모드(F11)로 실행됩니다.
    # options.add_argument('--blink-settings=imagesEnabled=false') #브라우저에서 이미지 로딩을 하지 않습니다.
    options.add_argument('--mute-audio') #브라우저에 음소거 옵션을 적용합니다.
    options.add_argument('incognito') #시크릿 모드의 브라우저가 실행됩니다.
    driver = webdriver.Chrome(ChromeDriverManager().install(), options=options)

    return driver

In [10]:
def buildDataset(dataset, pages: list):
    cnt = 0
    for restaurant_page in tqdm(pages, desc="Restaurants"):
        response = requests.get(restaurant_page, headers=headers)
        response.raise_for_status()
        html = response.text
        soup = BeautifulSoup(html, "html.parser")

        # url
        url = restaurant_page;    print(f'URL: {url}')
        
        # Using selenium
        driver.get(url)

        #  time
        try:
            WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.CLASS_NAME, 'NehmB')))
            time.sleep(3)
            element = driver.find_element(By.CLASS_NAME, 'NehmB')
            element.click()

            time_list = [] 
            WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.CLASS_NAME, "RiEuX.f")))
            time_elements = driver.find_elements(By.CLASS_NAME, "RiEuX.f")
            for time_element in time_elements:
                time_list.append(time_element.text.replace('\n', ':'))
        except:
            time_list = np.NaN
        print(f'Times: {time_list}')

        # description
        try:
            WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.LINK_TEXT, "View all details")))
            element = driver.find_element(By.LINK_TEXT, "View all details")
            element.click()
            WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.CSS_SELECTOR, "#BODY_BLOCK_JQUERY_REFLOW > div.VZmgo.D.X0.X1.Za > div > div.TocEc._Z.S2.H2._f > div > div > div.kwVln > div > div:nth-child(1) > div > div.jmnaM")))
            element = driver.find_element(By.CSS_SELECTOR, "#BODY_BLOCK_JQUERY_REFLOW > div.VZmgo.D.X0.X1.Za > div > div.TocEc._Z.S2.H2._f > div > div > div.kwVln > div > div:nth-child(1) > div > div.jmnaM")
            description = element.text
        except:
            description = np.NaN
        print(f'Description: {description}\n')

        # name 
        try:
            name = soup.find('h1', class_="HjBfq").text;    print(f'Name: {name}')
        except:
            name = np.NaN

        # category
        category = ''
        for x in soup.find('span', class_="DsyBj DxyfE"):
            if '$' not in x.text:
                category += f'{x.text}, '
        category = category[:-2]
        print(f'Category: {category}')

        # adress
        adress = soup.find_all('a', class_="AYHFM")[1].text;   print(f'Adress: {adress}')

        # rating
        try:
            rating = soup.find('span', class_="ZDEqb").text;    print(f'Rating: {rating}')
        # rating = soup.select_one("#component_52 > div.hILIJ > div > div:nth-child(1) > div > div:nth-child(1) > div.QEQvp > span.ZDEqb"); print(f'Rating: {rating}')
        except:
            rating = np.NaN

        # price
        try:
            priceLow = soup.find('div', class_="SrqKb").text.split(' - ')[0] 
            priceHigh = soup.find('div', class_="SrqKb").text.split(' - ')[1]
        except:
            priceLow = np.NaN; priceHigh = np.NaN
        print (f'PriceLow: {priceLow}, PriceHigh: {priceHigh}')

        # reviews, only english reviews
        total_reviews = int(soup.find_all('span', class_="count")[0].text[1:-1].replace(',', ''));    print(f'Total_reviews: {total_reviews}')
        review_pages = total_reviews//15 if total_reviews%15 == 0 else total_reviews//15 + 1;    print(f'Review pages: {review_pages}')
        review_list = []
        for i in tqdm(range(0, review_pages), desc='Review crwaling...'):
            if i > 1:
                url_list = url.split('-Reviews-')
                current_url = url_list[0] + f'-Reviews-or{15*i}-' + url_list[1]
                # print(f'current_url: {current_url}')
                response = requests.get(current_url, headers=headers)
                response.raise_for_status()
                html = response.text
                soup = BeautifulSoup(html, "html.parser") 

            reviews = soup.find_all('p', class_="partial_entry");
            for review in reviews:
                review_list.append(review.text)
            #     print(review.text)
            # print(f'Reviews: {len(review_list)}')

            time.sleep(rd.uniform(0.1, 0.5))
        print(f'Reviews: {len(review_list)}')


        
        time.sleep(rd.uniform(0.5, 1.5))

        dataset.loc[cnt] = [name, category, description, time_list, url, priceLow, priceHigh, review_list, adress, rating]
        cnt += 1
        
        
    return dataset 

In [12]:
url = "https://www.tripadvisor.com/Restaurants-g298085-Da_Nang.html"
restraurant_pages = setRestaurantPages(url)

# create dataset
dataset = createDataset()

# set webdriver
driver = setWebdriver()

# start crawling
dataset = buildDataset(dataset, restraurant_pages[:10])

# save dataset
dataset.to_csv('test.csv')
print(f'Dataset build complete!\n')
dataset

Response successful!


Set restaurant pages: 100%|██████████| 49/49 [00:47<00:00,  1.03it/s]


Total restraurant pages: 1811


  driver = webdriver.Chrome(ChromeDriverManager().install(), options=options)
Restaurants:   0%|          | 0/10 [00:00<?, ?it/s]

URL: https://www.tripadvisor.com//Restaurant_Review-g25231262-d24161498-Reviews-Co_Ba_Ph_Bo-Hai_Chau_Da_Nang.html
Times: ['Sun:10:00 AM - 10:00 PM', 'Mon:10:00 AM - 10:00 PM', 'Tue:10:00 AM - 10:00 PM', 'Wed:10:00 AM - 10:00 PM', 'Thu:10:00 AM - 10:00 PM', 'Fri:10:00 AM - 10:00 PM', 'Sat:10:00 AM - 10:00 PM']
Description: nan

Name: Cô Ba Phở Bò
Category: Seafood, Asian, Vietnamese
Adress: 154 Bạch Đằng Hải Châu 1, Hai Chau, Da Nang 550000 Vietnam
Rating: 5.0 
PriceLow: nan, PriceHigh: nan
Total_reviews: 44
Review pages: 3


Review crwaling...: 100%|██████████| 3/3 [00:01<00:00,  1.68it/s]


Reviews: 44


Restaurants:  10%|█         | 1/10 [00:19<02:58, 19.78s/it]

URL: https://www.tripadvisor.com//Restaurant_Review-g298085-d24985405-Reviews-Cardi_Pizzeria_Bach_Dang-Da_Nang.html
Times: ['Sun:12:00 AM - 11:59 PM', 'Mon:12:00 AM - 11:59 PM', 'Tue:12:00 AM - 11:59 PM', 'Wed:12:00 AM - 11:59 PM', 'Thu:12:00 AM - 11:59 PM', 'Fri:12:00 AM - 11:59 PM', 'Sat:12:00 AM - 11:59 PM']
Description: Premium Steak & Pizza Certified Imported Steak, Fresh Italian Standard Pizza, Premium Wine Cellar Millionaire Han River view. We have a chain of 4 restaurants in Da Nang and Hoi An: 124 Bạch Đằng, Đà Nẵng 76 Đường 2/9, Đà Nẵng, 93 Võ Văn Kiệt, Đà Nẵng and 32 Thái Phiên, Hội An

Name: Cardi Pizzeria Bach Dang
Category: American, Steakhouse, Brew Pub
Adress: 124 Bạch Đằng, Hải Châu, Đà Nẵng, Da Nang Vietnam
Rating: 5.0 
PriceLow: nan, PriceHigh: nan
Total_reviews: 333
Review pages: 23


Review crwaling...: 100%|██████████| 23/23 [00:32<00:00,  1.42s/it]


Reviews: 333


Restaurants:  20%|██        | 2/10 [00:58<04:08, 31.10s/it]

URL: https://www.tripadvisor.com//Restaurant_Review-g298085-d7000524-Reviews-Ca_Chuon_Co_Vietnamese_Seafood_Restaurant-Da_Nang.html
Times: ['Sun:06:00 AM - 10:00 PM', 'Mon:06:00 AM - 10:00 PM', 'Tue:06:00 AM - 10:00 PM', 'Wed:06:00 AM - 10:00 PM', 'Thu:06:00 AM - 10:00 PM', 'Fri:06:00 AM - 10:00 PM', 'Sat:06:00 AM - 10:00 PM']
Description: Ca Chuon Co, ('flying fish') a modern stylish venue to enjoy the freshest seafood catch of the day. Sit back, relax and bask in the balmy sea breezes of the South East Sea. The Ca Chuon Co restaurant will deliver to you the most authentic traditional Vietnamese Food and the best taste of our local flavor to your dish.

Name: Ca Chuon Co Vietnamese - Seafood Restaurant.
Category: Cafe, International, Asian
Adress: 99 Vo Nguyen Giap Street Ngu Hanh Son District, Da Nang 550000 Vietnam
Rating: 5.0 
PriceLow: ₩12,953, PriceHigh: ₩38,860
Total_reviews: 227
Review pages: 16


Review crwaling...: 100%|██████████| 16/16 [00:23<00:00,  1.48s/it]


Reviews: 227


Restaurants:  30%|███       | 3/10 [01:28<03:34, 30.64s/it]

URL: https://www.tripadvisor.com//Restaurant_Review-g298085-d13236259-Reviews-Bistecca_Restaurant_Danang-Da_Nang.html
Times: ['Sun:06:00 AM - 10:00 PM', 'Mon:06:00 AM - 10:00 PM', 'Tue:06:00 AM - 10:00 PM', 'Wed:06:00 AM - 10:00 PM', 'Thu:06:00 AM - 10:00 PM', 'Fri:06:00 AM - 10:00 PM', 'Sat:06:00 AM - 10:00 PM']
Description: Bistecca – Italian restaurant offering the original home-made taste with a large selection of Italian and international wines, ensuring the right combination of flavours from plate to your glass. Moreover, we serve buffet breakfast with a wide range of food in a luxury ambiance, you can choose indoor or outdoor to enjoy breakfast

Name: Bistecca Restaurant Danang
Category: Italian, Southwestern, European
Adress: 20 Dong Da 7F New Orient Hotel, Thuan Phuoc Ward, Hai Chau District, Da Nang 550000 Vietnam
Rating: 5.0 
PriceLow: ₩3,886, PriceHigh: ₩64,767
Total_reviews: 77
Review pages: 6


Review crwaling...: 100%|██████████| 6/6 [00:07<00:00,  1.21s/it]


Reviews: 77


Restaurants:  40%|████      | 4/10 [01:42<02:23, 23.97s/it]

URL: https://www.tripadvisor.com//Restaurant_Review-g298085-d24082246-Reviews-3_Big_Nu_ng_L_u-Da_Nang.html
Times: ['Sun:11:00 AM - 11:00 PM', 'Mon:11:00 AM - 10:00 PM', 'Tue:11:00 AM - 10:00 PM', 'Wed:11:00 AM - 10:00 PM', 'Thu:11:00 AM - 10:00 PM', 'Fri:11:00 AM - 10:00 PM', 'Sat:11:00 AM - 10:00 PM']
Description: 3 BiG - Nướng & Lẩu Nhà hàng BBQ - Thịt nướng - Korean Buffet - Buffet Tobokki & Pizza BBQ Restaurant - Grilled Beef - Korean Buffet - Tobokki Buffet

Name: 3 Big - Nướng & Lẩu
Category: Pizza, Barbecue, Korean
Adress: 134 Phan Châu Trinh, Phước Ninh, Hải Châu, Đà Nẵng, Da Nang 550000 Vietnam
Rating: 5.0 
PriceLow: nan, PriceHigh: nan
Total_reviews: 13
Review pages: 1


Review crwaling...: 100%|██████████| 1/1 [00:00<00:00,  3.04it/s]


Reviews: 13


Restaurants:  50%|█████     | 5/10 [01:50<01:30, 18.02s/it]

URL: https://www.tripadvisor.com//Restaurant_Review-g15296807-d13810289-Reviews-Thia_G_Restaurant_Da_Nang-My_An_Da_Nang.html
Times: ['Sun:10:00 AM - 10:00 PM', 'Mon:10:00 AM - 10:00 PM', 'Tue:10:00 AM - 10:00 PM', 'Wed:10:00 AM - 10:00 PM', 'Thu:10:00 AM - 10:00 PM', 'Fri:10:00 AM - 10:00 PM', 'Sat:10:00 AM - 10:00 PM']
Description: nan

Name: Thìa Gỗ Restaurant Da Nang
Category: Vietnamese, Vegetarian Friendly
Adress: 53 Phan Thuc Duyen, My An, Da Nang 50507 Vietnam
Rating: 5.0 
PriceLow: ₩2,124, PriceHigh: ₩26,554
Total_reviews: 1840
Review pages: 123


Review crwaling...: 100%|██████████| 123/123 [03:20<00:00,  1.63s/it]


Reviews: 1840


Restaurants:  60%|██████    | 6/10 [05:28<05:44, 86.03s/it]

URL: https://www.tripadvisor.com//Restaurant_Review-g298085-d11616043-Reviews-Six_On_Six_Cafe-Da_Nang.html
Times: ['Sun:07:30 AM - 9:30 PM', 'Mon:07:30 AM - 9:30 PM', 'Tue:07:30 AM - 9:30 PM', 'Wed:07:30 AM - 9:30 PM', 'Thu:07:30 AM - 9:30 PM', 'Fri:07:30 AM - 9:30 PM', 'Sat:07:30 AM - 9:30 PM']
Description: Hi! We're a small, funky specialty cafe on the east side of Danang, sourcing the finest Vietnamese beans (100% arabica) and single origins from boutique farms in Da Lat. We offer cold brew, pour over, Aeropress, and all your favorite espresso-based drinks done just right. We also serve our own takes on traditional Vietnamese coffee styles. And if you need something more substantial, we offer a wide array of hearty, healthy western fare—everything from blue cheese omelets to BLETs and salads. We're open everyday from 8:00am to 5:00pm. Come give us a try, you won’t be disappointed.

Name: Six On Six Cafe
Category: American, Cafe, Fusion
Adress: 64 Bà Huyện Thanh Quan K14/11 Luu Quang

Review crwaling...: 100%|██████████| 15/15 [00:22<00:00,  1.51s/it]


Reviews: 214


Restaurants:  70%|███████   | 7/10 [05:58<03:23, 67.68s/it]

URL: https://www.tripadvisor.com//Restaurant_Review-g298085-d25073751-Reviews-La_Cabana_House_of_BBQ-Da_Nang.html
Times: ['Sun:4:00 PM - 11:00 PM', 'Mon:4:00 PM - 11:00 PM', 'Tue:4:00 PM - 11:00 PM', 'Wed:4:00 PM - 11:00 PM', 'Thu:4:00 PM - 11:00 PM', 'Fri:4:00 PM - 11:00 PM', 'Sat:4:00 PM - 11:00 PM']
Description: La Cabana is a Cozy Argentinian Steakhouse in Da Nang. Located right in front of Carp Jumping Dragon Gate by the Han River (near Dragon bridge), La Cabana has spacious space, comfortable seats, excellent air conditioner suitable for couple and group dining. Our sparkling balcony gives you an overall view of the Han river and Dragon bridge. La Cabana's signature includes perfect grilled Argentinian Steaks with a very reasonable price. Have yourself a memorable experience with soft music, excellent food, imported wine at La Cabana restaurant.

Name: La Cabana - House of BBQ
Category: Steakhouse, Grill, Diner
Adress: 505 D. Tran Hung Dao An Hai Trung, Son Tra, Da Nang 55000 Vie

Review crwaling...: 100%|██████████| 11/11 [00:14<00:00,  1.27s/it]


Reviews: 151


Restaurants:  80%|████████  | 8/10 [06:19<01:45, 52.95s/it]

URL: https://www.tripadvisor.com//Restaurant_Review-g298085-d3187207-Reviews-The_Golden_Dragon-Da_Nang.html
Times: ['Sun:11:30 AM - 2:00 PM:5:30 PM - 9:30 PM', 'Mon:11:30 AM - 2:00 PM:5:30 PM - 9:30 PM', 'Tue:11:30 AM - 2:00 PM:5:30 PM - 9:30 PM', 'Wed:11:30 AM - 2:00 PM:5:30 PM - 9:30 PM', 'Thu:11:30 AM - 2:00 PM:5:30 PM - 9:30 PM', 'Fri:11:30 AM - 2:00 PM:5:30 PM - 9:30 PM', 'Sat:11:30 AM - 2:00 PM:5:30 PM - 9:30 PM']
Description: Tease your taste bud with authentic Cantonese cuisine in this Chinese style restaurant daily for lunch & dinner (except Monday). DIM SUM A-LA-CARTE MENU: Soup, Steamed/ Fried Dim sum, Steamed Rice Roll, Congee, Fried rice, Noodles, Dessert, etc. CHINESE A-LA-CARTE MENU: Roasted Duck, Suckling pig, BBQ pork glaze with honey, abalone soup, etc. ALL-YOU-CAN-EAT DIM SUM. CHINESE SET MENUS for group from 4 people. The famous must-try CHINESE BUFFET is available on SUNDAY for lunch & dinner.

Name: The Golden Dragon
Category: Chinese, International, Asian
Adress:

Review crwaling...: 100%|██████████| 8/8 [00:11<00:00,  1.42s/it]


Reviews: 115


Restaurants:  90%|█████████ | 9/10 [06:37<00:42, 42.08s/it]

URL: https://www.tripadvisor.com//Restaurant_Review-g298085-d25502987-Reviews-The_Gypsy_Rooftop_Kitchen_Bar-Da_Nang.html
Times: ['Sun:5:00 PM - 12:00 AM', 'Mon:5:00 PM - 12:00 AM', 'Tue:5:00 PM - 12:00 AM', 'Wed:5:00 PM - 12:00 AM', 'Thu:5:00 PM - 12:00 AM', 'Fri:5:00 PM - 12:00 AM', 'Sat:5:00 PM - 12:00 AM']
Description: THE GYPSY is perched on the rooftop of the stylish New Orient Hotel in Hai Chau district. The panoramic, wrap-around terraces offer 360-degree views of the stunning Da Nang skyline overlooking the city and the iconic river. The restaurant and lounge bar has been designed to the finest detail, wowing guests at every turn. THE GYPSY offers a modern dining experience, revisiting world-famous ingredients & recipes with a twist using different cooking techniques including a custom-made wood fire oven. Behind the Gypsy is a multicultural crew who brought their experiences from Cannes, Hong Kong, Shanghai to create an unforgettable dining experience for their guests.

Name: 

Review crwaling...: 100%|██████████| 9/9 [00:11<00:00,  1.29s/it]


Reviews: 132


Restaurants: 100%|██████████| 10/10 [06:56<00:00, 41.65s/it]

Dataset build complete!






Unnamed: 0,name,category,description,time,URL,priceLow,priceHigh,reviews,adress,rating
0,Cô Ba Phở Bò,"Seafood, Asian, Vietnamese",,"[Sun:10:00 AM - 10:00 PM, Mon:10:00 AM - 10:00...",https://www.tripadvisor.com//Restaurant_Review...,,,[Nice view and environment! \nFinally tasted t...,"154 Bạch Đằng Hải Châu 1, Hai Chau, Da Nang 55...",5.0
1,Cardi Pizzeria Bach Dang,"American, Steakhouse, Brew Pub",Premium Steak & Pizza Certified Imported Steak...,"[Sun:12:00 AM - 11:59 PM, Mon:12:00 AM - 11:59...",https://www.tripadvisor.com//Restaurant_Review...,,,[I came in for pizza because they look like th...,"124 Bạch Đằng, Hải Châu, Đà Nẵng, Da Nang Vietnam",5.0
2,Ca Chuon Co Vietnamese - Seafood Restaurant.,"Cafe, International, Asian","Ca Chuon Co, ('flying fish') a modern stylish ...","[Sun:06:00 AM - 10:00 PM, Mon:06:00 AM - 10:00...",https://www.tripadvisor.com//Restaurant_Review...,"₩12,953","₩38,860",[I was looking for some decent seafoods restau...,99 Vo Nguyen Giap Street Ngu Hanh Son District...,5.0
3,Bistecca Restaurant Danang,"Italian, Southwestern, European",Bistecca – Italian restaurant offering the ori...,"[Sun:06:00 AM - 10:00 PM, Mon:06:00 AM - 10:00...",https://www.tripadvisor.com//Restaurant_Review...,"₩3,886","₩64,767",[Beautiful dinner and great service! All meals...,"20 Dong Da 7F New Orient Hotel, Thuan Phuoc Wa...",5.0
4,3 Big - Nướng & Lẩu,"Pizza, Barbecue, Korean",3 BiG - Nướng & Lẩu Nhà hàng BBQ - Thịt nướng ...,"[Sun:11:00 AM - 11:00 PM, Mon:11:00 AM - 10:00...",https://www.tripadvisor.com//Restaurant_Review...,,,[Great service. Good price and good food. The ...,"134 Phan Châu Trinh, Phước Ninh, Hải Châu, Đà ...",5.0
5,Thìa Gỗ Restaurant Da Nang,"Vietnamese, Vegetarian Friendly",,"[Sun:10:00 AM - 10:00 PM, Mon:10:00 AM - 10:00...",https://www.tripadvisor.com//Restaurant_Review...,"₩2,124","₩26,554",[Quynh was our server and took great care of u...,"53 Phan Thuc Duyen, My An, Da Nang 50507 Vietnam",5.0
6,Six On Six Cafe,"American, Cafe, Fusion","Hi! We're a small, funky specialty cafe on the...","[Sun:07:30 AM - 9:30 PM, Mon:07:30 AM - 9:30 P...",https://www.tripadvisor.com//Restaurant_Review...,"₩2,591","₩6,477",[My favorite western breakfast restaurant in a...,"64 Bà Huyện Thanh Quan K14/11 Luu Quang Thuan,...",4.5
7,La Cabana - House of BBQ,"Steakhouse, Grill, Diner",La Cabana is a Cozy Argentinian Steakhouse in ...,"[Sun:4:00 PM - 11:00 PM, Mon:4:00 PM - 11:00 P...",https://www.tripadvisor.com//Restaurant_Review...,"₩9,067","₩38,860",[Booked table after reading reviews and was no...,"505 D. Tran Hung Dao An Hai Trung, Son Tra, Da...",5.0
8,The Golden Dragon,"Chinese, International, Asian",Tease your taste bud with authentic Cantonese ...,"[Sun:11:30 AM - 2:00 PM:5:30 PM - 9:30 PM, Mon...",https://www.tripadvisor.com//Restaurant_Review...,,,[Employee is inattentive. Very bad experiences...,"2nd Floor, Grand Mercure Danang Hotel, Lot A1,...",5.0
9,The Gypsy Rooftop Kitchen & Bar,"Bar, European",THE GYPSY is perched on the rooftop of the sty...,"[Sun:5:00 PM - 12:00 AM, Mon:5:00 PM - 12:00 A...",https://www.tripadvisor.com//Restaurant_Review...,"₩15,933","₩69,041",[If you are looking for a great place for the ...,"20 Dong Da 10th floor, New Orient Hotel, Da Na...",5.0


---
### Partial testing

In [16]:
# rating

url = "https://www.tripadvisor.com//Restaurant_Review-g298085-d24082246-Reviews-3_Big_Nu_ng_L_u-Da_Nang.html"
response = requests.get(url, headers=headers)
response.raise_for_status()
html = response.text
soup = BeautifulSoup(html, "html.parser")
with open("output1.html", "w") as file:
    file.write(str(soup))   

# rating
rating = soup.find('span', class_="ZDEqb").text;    print(f'Rating: {rating}')
# rating = soup.select_one("#component_52 > div.hILIJ > div > div:nth-child(1) > div > div:nth-child(1) > div.QEQvp > span.ZDEqb"); 
# rating = soup.find('div', class_="QEQvp").text
print(f'Rating: {rating}')

Rating: 5.0 
Rating: 5.0 


In [11]:
# reviews, only english reviews

url = "https://www.tripadvisor.com//Restaurant_Review-g298085-d24082246-Reviews-3_Big_Nu_ng_L_u-Da_Nang.html"
response = requests.get(url, headers=headers)
response.raise_for_status()
html = response.text
soup = BeautifulSoup(html, "html.parser")

total_reviews = int(soup.find_all('span', class_="count")[0].text[1:-1].replace(',', ''));    print(f'Total_reviews: {total_reviews}')
review_pages = total_reviews//15 if total_reviews%15 == 0 else total_reviews//15 + 1;    print(f'Review pages: {review_pages}')
review_list = []
for i in tqdm(range(0, review_pages), desc='Review crwaling...'):
    if i > 1:
        url_list = url.split('-Reviews-')
        current_url = url_list[0] + f'-Reviews-or{15*i}-' + url_list[1]
        # print(f'current_url: {current_url}')
        response = requests.get(current_url, headers=headers)
        response.raise_for_status()
        html = response.text
        soup = BeautifulSoup(html, "html.parser") 

    reviews = soup.find_all('p', class_="partial_entry");
    for review in reviews:
        review_list.append(review.text)
    #     print(review.text)
    # print(f'Reviews: {len(review_list)}')

    time.sleep(rd.uniform(0.1, 0.5))
print(f'Reviews: {len(review_list)}')

Total_reviews: 13
Review pages: 1


Review crwaling...: 100%|██████████| 1/1 [00:00<00:00,  2.37it/s]

Reviews: 13





In [13]:
# time test

driver = setWebdriver()
url = "https://www.tripadvisor.com/Restaurant_Review-g15296807-d13810289-Reviews-Thia_G_Restaurant_Da_Nang-My_An_Da_Nang.html"
response = driver.get(url)

try:
    WebDriverWait(driver, 5).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, "#component_50 > div > div:nth-child(3) > span.DsyBj.YTODE > div > span.mMkhr")))
    element = driver.find_element(By.CSS_SELECTOR, "#component_50 > div > div:nth-child(3) > span.DsyBj.YTODE > div > span.mMkhr")
    time.sleep(1)
    element.click() 
except:
    times = np.NaN

time_list = [] 
time_elements = driver.find_elements(By.CLASS_NAME, "RiEuX.f")
for time_element in time_elements:
    time_list.append(time_element.text.replace('\n', ':'))
    print(time_element)
print(time_list)

  driver = webdriver.Chrome(ChromeDriverManager().install(), options=options)


<selenium.webdriver.remote.webelement.WebElement (session="256f007f9ab2cad63c076291897ee137", element="67F080C407A4187949C41A7E8080B652_element_75")>
<selenium.webdriver.remote.webelement.WebElement (session="256f007f9ab2cad63c076291897ee137", element="67F080C407A4187949C41A7E8080B652_element_76")>
<selenium.webdriver.remote.webelement.WebElement (session="256f007f9ab2cad63c076291897ee137", element="67F080C407A4187949C41A7E8080B652_element_77")>
<selenium.webdriver.remote.webelement.WebElement (session="256f007f9ab2cad63c076291897ee137", element="67F080C407A4187949C41A7E8080B652_element_78")>
<selenium.webdriver.remote.webelement.WebElement (session="256f007f9ab2cad63c076291897ee137", element="67F080C407A4187949C41A7E8080B652_element_79")>
<selenium.webdriver.remote.webelement.WebElement (session="256f007f9ab2cad63c076291897ee137", element="67F080C407A4187949C41A7E8080B652_element_80")>
<selenium.webdriver.remote.webelement.WebElement (session="256f007f9ab2cad63c076291897ee137", elemen