Day 131

업무자동화(RPA)

웹 자동화

동적 페이지 스크롤

import time
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys

options = webdriver.ChromeOptions()
options.add_experimental_option('excludeSwitches', ['enable-logging'])
s = Service('C:/webdriver/chromedriver.exe')

browser = webdriver.Chrome(service=s, options=options)

browser.get('https://shopping.naver.com/home/p/index.naver')

# '무선마우스' 입력
browser.find_element(By.XPATH, '//*[@id="autocompleteWrapper"]/input[1]').send_keys('무선마우스')

time.sleep(0.5)

# 검색 버튼 클릭
browser.find_element(By.XPATH, '//*[@id="autocompleteWrapper"]/a[2]').click()

# 스크롤
# 지정한 위치로 스크롤 내리기
browser.execute_script('window.scrollTo(0, 1440)')

# 화면 가장 아래로 스크롤 내리기
browser.execute_script('window.scrollTo(0, document.body.scrollHeight)')

# 동적 페이지에 대해서 마지막까지 스크롤 반복 수행
interval = 2 # 2초에 한번씩 스크롤 내리기

# 현재 문서 높이를 가져와서 저장
prev_height = browser.execute_script('return document.body.scrollHeight')

# 반복 수행
while True:
    # 스크롤을 화면 가장 아래로 내림
    browser.execute_script('window.scrollTo(0, document.body.scrollHeight)')

    # 페이지 로딩 대기
    time.sleep(interval)

    # 현재 문서 높이 가져와서 저장
    curr_height = browser.execute_script('return document.body.scrollHeight')
    if curr_height == prev_height: # 직전 높이와 같으면, 높이 변화가 없으면
        break # 반복문 탈출 (모든 스크롤 동작 완료)

    prev_height = curr_height

time.sleep(3)

browser.quit()

execute_script는 주어진 javascript를 실행시켜 준다.

특정 영역 스크롤

import time
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains

options = webdriver.ChromeOptions()
options.add_experimental_option('excludeSwitches', ['enable-logging'])
s = Service('C:/webdriver/chromedriver.exe')

browser = webdriver.Chrome(service=s, options=options)
browser.get('https://www.w3schools.com/html/default.asp')
browser.maximize_window()

time.sleep(2)

# 특정 영역 스크롤
elem = browser.find_element(By.XPATH, '//*[@id="leftmenuinnerinner"]/a[61]')

# 방법 1 : ActionChain
actions = ActionChains(browser)
actions.move_to_element(elem).perform()

# 방법 2 : 좌표 정보 이용
xy = elem.location_once_scrolled_into_view # 함수가 아니므로 () 쓰지 말 것, element의 좌표 정보 제공
print('type : ', type(xy))
print('value : ', xy)

elem.click()

time.sleep(3)
browser.quit()

type : <class 'dict'>
value : {'x': 0, 'y': 752}

이미 웹 페이지에 존재한다면 꼭 눈에 보여야 클릭할 수 있는 것이 아니어서 스크롤을 내리지 않고 클릭할 수 있다.

파일 다운로드

import time
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys

options = webdriver.ChromeOptions()
options.add_experimental_option('excludeSwitches', ['enable-logging'])
options.add_experimental_option('prefs', {'download.default_directory':r'C:\Users\HJK\Desktop\study\Python_workspace\RPA\web_automation'}) # 다운로드 경로 설정
s = Service('C:/webdriver/chromedriver.exe')

browser = webdriver.Chrome(service=s, options=options)
browser.get('https://www.w3schools.com/tags/tryit.asp?filename=tryhtml5_a_download')

browser.switch_to.frame('iframeResult')

# 다운로드 링크 클릭
elem = browser.find_element(By.XPATH, '/html/body/p[2]/a')
elem.click()

time.sleep(2)
browser.quit()

위의 예제는 그림을 클릭하면 다운로드가 시작되는 예제인 것을 참고해야 한다.

페이지 로딩 대기

import time
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as ec

options = webdriver.ChromeOptions()
options.add_experimental_option('excludeSwitches', ['enable-logging'])
s = Service('C:/webdriver/chromedriver.exe')

browser = webdriver.Chrome(service=s, options=options)
browser.maximize_window()
browser.get('https://flight.naver.com/')

time.sleep(0.5)

# 가는 날 클릭
browser.find_element(By.XPATH, '//*[@id="__next"]/div/div[1]/div[4]/div/div/div[2]/div[2]/button[1]').click()
time.sleep(1)
browser.find_element(By.XPATH, '//*[@id="__next"]/div/div[1]/div[9]/div[2]/div[1]/div[2]/div/div[2]/table/tbody/tr[4]/td[5]/button/b').click()

# 오는 날 클릭
browser.find_element(By.XPATH, '//*[@id="__next"]/div/div[1]/div[9]/div[2]/div[1]/div[2]/div/div[3]/table/tbody/tr[2]/td[3]/button/b').click()

# 제주도 클릭
browser.find_element(By.XPATH, '//*[@id="__next"]/div/div[1]/div[4]/div/div/div[2]/div[1]/button[2]').click()
browser.find_element(By.XPATH, '//*[@id="__next"]/div/div[1]/div[9]/div[2]/section/section/button[1]').click()
browser.find_element(By.XPATH, '//*[@id="__next"]/div/div[1]/div[9]/div[2]/section/section/div/button[2]').click()

# 항공권 검색 클릭
browser.find_element(By.XPATH, '//*[@id="__next"]/div/div[1]/div[4]/div/div/button').click()

# 로딩 대기
try:
    # 첫 번째 결과를 가져와서 출력
    elem = WebDriverWait(browser, 10).until(ec.presence_of_element_located((By.XPATH, '//*[@id="__next"]/div/div[1]/div[4]/div/div[2]/div[2]')))
    print(elem.text)
except:
    print('Failed')

time.sleep(5)
browser.quit()

제주항공
1%적립이벤트혜택
06:00GMP
07:05CJU
01시간 05분
일반석편도 80,500원~
네이버페이 결제시 1%+1,500원 적립편도 78,195원~

중요한 부분은 try except 구문 부분이다.

elem = WebDriverWait(browser, 10).until(ec.presence_of_element_located((By.XPATH, '//*[@id="__next"]/div/div[1]/div[4]/div/div[2]/div[2]')))

이 부분을 해석하자면 10초간 기다리는데, presence_of_element_located() 안의 요소가 웹 페이지 상에 존재하면 10초를 다 기다리지 않고 다음으로 넘어간다.

안에 '()'가 하나 더 있는 것은 튜플로 넘겨주기 위함이다.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

220223.md

220223.md

Day 131

업무자동화(RPA)

웹 자동화

동적 페이지 스크롤

특정 영역 스크롤

파일 다운로드

페이지 로딩 대기

Files

220223.md

Latest commit

History

220223.md

File metadata and controls

Day 131

업무자동화(RPA)

웹 자동화

동적 페이지 스크롤

특정 영역 스크롤

파일 다운로드

페이지 로딩 대기