Selenium을 이용한 Web-Scraping

환경 설정

1. 파이썬과 필요한 라이브러리 requests beautifulsoup4 selenium
2. 크롬 드라이버 73버전 (https://sites.google.com/a/chromium.org/chromedriver/downloads)

목표
- 아고다(www.agoda.com)에서 특정 날짜, 특정 지역 숙소를 저렴한 비용 순으로 스크래핑

In [1]:
# 1.1
import requests
from bs4 import BeautifulSoup


res = requests.get("https://www.agoda.com/")
content = BeautifulSoup(res.content, 'html.parser')

inputs = content.findAll('input',{'class':"SearchBoxTextEditor"})

print(inputs)

[]


In [2]:
# 2.1
from selenium import webdriver

from time import sleep

driver = webdriver.Chrome('./chromedriver/chromedriver.exe')

driver.get('https://www.agoda.com/')
sleep(10)

content = BeautifulSoup(driver.page_source, 'html.parser')
inputs = content.findAll('input',{'class':"SearchBoxTextEditor"})

print(inputs)
driver.quit()

[<input aria-label="도시, 지역, 숙소명, 관광 명소 등으로 검색" class="SearchBoxTextEditor SearchBoxTextEditor--autocomplete" data-selenium="textInput" placeholder="도시, 지역, 숙소명, 관광 명소 등으로 검색" tabindex="-1" type="text" value=""/>]


In [3]:
# 2.2
driver = webdriver.Chrome('./chromedriver/chromedriver.exe')

driver.get('https://www.agoda.com/ko-kr/pages/agoda/default/DestinationSearchResult.aspx?asq=rp7c5epycLthZ0hHoORGnpufa9Vwpz6XltTHq4n%2B9gN3dKLJ2CSXy2MFQ4mXIPMkG8mkPiCChFumqFZwERSiKVCFcPsjdKRWeTMp2t6jQpMNYJ%2FcCWv%2F24SObsMFxXBeq%2F%2BbkS51iQs%2FzvQsTUxKZQ%2Foa8qaSyQrg61mdIzB86KPzSkK9fKrnjwxW6q6lPcIzPx3p8UWZS2iN1I3SW%2BqojVI1hfLTktOcN3QfCrx%2FY0%3D&city=16901&cid=1732639&tag=a85f57f6-f4b0-5117-39ff-f69b8b36db1a&gclid=Cj0KCQjwkIzlBRDzARIsABgXqV9474e-pU1st8ZdlFWySlL168eYxXpRblOaoy9bxbsHGkWD5Xwh6eUaAtLFEALw_wcB&tick=636898847383&isdym=true&searchterm=%EC%A0%9C%EC%A3%BC%EB%8F%84%EB%8F%84&txtuuid=62401b76-c320-499e-81f9-ce04e8f77881&languageId=9&userId=d668e3a1-5c83-493c-9b69-118608c89cab&sessionId=4fjgaxujjqkkm4i0l24hq2g2&pageTypeId=1&origin=KR&locale=ko-KR&aid=81837&currencyCode=KRW&htmlLanguage=ko-kr&cultureInfoName=ko-KR&ckuid=d668e3a1-5c83-493c-9b69-118608c89cab&prid=0&checkIn=2019-04-12&checkOut=2019-04-13&rooms=1&adults=2&children=0&priceCur=KRW&los=1&textToSearch=%EC%A0%9C%EC%A3%BC%EB%8F%84%EB%8F%84&productType=-1&travellerType=1&familyMode=off')
sleep(2)

In [4]:
# 2.3
driver.find_element_by_class_name('CalendarAlertMessage__close').click()
sleep(3)

driver.find_element_by_css_selector("a[data-element-name='search-sort-price']").click()

In [5]:
# 2.4
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys

actions = ActionChains(driver)
last_height = driver.execute_script("return document.body.scrollHeight")
while(True):
    for _ in range(15):
        actions.send_keys(Keys.SPACE).perform()
        sleep(1)
    new_height = driver.execute_script("return document.body.scrollHeight")
    if new_height == last_height:
        break
    last_height = new_height
    
print("loading complete")
driver.execute_script("window.scrollTo(0, 0);")
sleep(3)

loading complete


In [6]:
# 2.5
content = BeautifulSoup(driver.page_source, 'html.parser')
list_items = content.findAll('li',{'class':["PropertyCardItem","ssr-search-result"]})

class Hotel:
    def __init__(self,name,price):
        self.name = name
        self.price = price
        
    def __str__(self):
        return self.name + " = " + self.price

    def __repr__(self):
        return str(self)
        
hotels = []
for item in list_items:
    hotel_name = item.find('h3', class_="hotel-name")
    price = item.find('span', class_="price-box__price__amount")
    
    if hotel_name is not None and price is not None:
        new_hotel_data = Hotel(hotel_name.text, price.text)
        hotels.append(new_hotel_data)

print(hotels)
sleep(5)
driver.quit()

[애월의 프라이빗 하우스 (50m², 침실 2개, 프라이빗 욕실 1개) (Jeju  Island  Elvie  House l 2 Rooms l 3 Beds ) = 12,397, 씨엘 블루 호텔 (Hotel Ciel Blue) = 41,274, 바다와 자전거  (Sea and Bike) = 18,000, 오라성 모텔 (Orasung Motel) = 18,828, 민중각 게스트하우스 (Minjoonggak Guesthouse) = 20,284, 트윈스 호텔 (Twins Hotel Jeju) = 22,841, 호텔 올레세븐 (Hotel Olle Seven) = 23,513, 꼬닥꼬닥 게스트하우스  (Kodakkodak Guesthouse) = 23,517, 호텔 파인힐 (Hotel Pinehill) = 23,753, 호텔 가온 제이스테이 (Hotel Gaon J Stay) = 24,426, 플레이스 캠프 제주 (Playce Camp Jeju) = 24,570, 그라벨 호텔 (Grabel Hotel) = 40,991, 호텔 G (Hotel G) = 24,775, 성산의 프라이빗 하우스 (26m², 침실 1개, 프라이빗 욕실 1개) (brassica folwer pension) = 25,086, 제주 알프스호텔 (Jeju Alps Hotel) = 25,550, 제주 백록담 호텔 (jeju-brdhotel) = 25,865, 호텔 베스트원 (Hotel BestOne) = 26,102, 제주 마실 게스트하우스 (Masil Guesthouse Jeju) = 26,205, 참피온 호텔 (Champion Hotel) = 26,329, 뷰타워리조트 (View Tower Resort) = 26,417, 윈드힐 홀리데이 하우스 (Windhill Holiday House) = 26,532, 더 루케테 호텔 (The Lucete hotel) = 26,573, 힐링 팰리스 펜션 (Healing Palace Pension) = 26,735, Jeju Orastay Hotel = 27,156