# Selenium

自动化测试工具，支持多种浏览器。主要用来解决JavaScript渲染问题

本章内容：
- [基本使用](#基本使用)
- [声明浏览器对象](#声明浏览器对象)
- [访问页面](#访问页面)
- [查找元素](#查找元素)
    - [单个元素](#单个元素)
    - [多个元素](#多个元素)

## 基本使用

In [6]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait

browser = webdriver.Chrome()
try:
    browser.get("https://www.baidu.com")
    input = browser.find_element_by_id('kw') # 找到搜索框
    input.send_keys('English speeking' + Keys.ENTER) # 输入'English speeking'，并发送回车键
    wait = WebDriverWait(browser, 10) # 等待10秒
    wait.until(EC.presence_of_element_located((By.ID, 'content_left'))) # 直到 DOM元素的ID为‘content_left’出现
    print(browser.current_url)   # 打印当前的url
    print(browser.get_cookies()) # 打印cookies
#     print(browser.page_source) # 打印网页源代码
finally:
    browser.close() # 关闭浏览器

https://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=1&rsv_idx=1&tn=baidu&wd=English%20speeking&rsv_pq=cf299b8a006d756a&rsv_t=a13fD6g4fnCt0fOzBd1mVq%2FBu8l2wTZnSL1Y1pCA9TIwZZdwnM9a0jvUVrE&rqlang=cn&rsv_enter=1&rsv_dl=tb&rsv_sug3=16&rsv_sug2=0&inputT=159&rsv_sug4=159
[{'domain': 'baidu.com', 'httpOnly': False, 'secure': False, 'value': '1', 'name': 'PSINO', 'path': '/'}, {'domain': 'www.baidu.com', 'httpOnly': False, 'secure': False, 'value': '1', 'name': 'BD_CK_SAM', 'path': '/'}, {'domain': 'baidu.com', 'httpOnly': False, 'secure': False, 'value': '1441_21083_29074_29522_29519_28518_29098_29567_28832_29220_26350', 'name': 'H_PS_PSSID', 'path': '/'}, {'domain': 'baidu.com', 'httpOnly': False, 'secure': False, 'value': '15DD077A47532A30579D7BDED3DBECF3', 'expiry': 3711934537.317591, 'name': 'BIDUPSID', 'path': '/'}, {'domain': 'baidu.com', 'httpOnly': False, 'secure': False, 'value': '0', 'name': 'delPer', 'path': '/'}, {'domain': 'baidu.com', 'httpOnly': False, 'secure': False, 'value': '156445

## 声明浏览器对象

In [None]:
from selenium import webdriver

chrome = webdriver.Chrome()
firfox = webdriver.Firfox()
safari = webdriver.Safari()
edge = webdriver.Edge()
ie = webdriver.Ie()
opera = webdriver.Opera()
phantomjs = webdriver.PhantomJS()

## 访问页面

In [13]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options

# 设置Chrome的headless模式
chrome_options = Options()
chrome_options.add_argument("--headless")

chrome = webdriver.Chrome(options=chrome_options)

chrome.get("https://www.taobao.com")
print("浏览器已打开")
print(chrome.current_url)
print(chrome.get_cookies())
chrome.close()
print("浏览器已关闭")

浏览器已打开
https://www.taobao.com/
[{'domain': 'taobao.com', 'httpOnly': True, 'secure': False, 'value': '18797bf8924c663deb1192f8be8a1629', 'name': 'cookie2', 'path': '/'}, {'domain': 'taobao.com', 'httpOnly': False, 'secure': False, 'value': '3ee3e05766768', 'name': '_tb_token_', 'path': '/'}, {'domain': 'taobao.com', 'httpOnly': False, 'secure': False, 'value': 'BJOTxcIlyoVhsobRhV0YNWAgKRe9SCcKaQfcykWw77LpxLNmzRi3WvEW-j2q5H8C', 'expiry': 1580007101, 'name': 'isg', 'path': '/'}, {'domain': 'taobao.com', 'httpOnly': False, 'secure': False, 'value': 'vp7GFcy+H2ICAXWt2d7cAhP/', 'expiry': 2195175102, 'name': 'cna', 'path': '/'}, {'domain': 'taobao.com', 'httpOnly': False, 'secure': False, 'value': 'cn', 'expiry': 1595991101.682661, 'name': 'thw', 'path': '/'}, {'domain': 'taobao.com', 'httpOnly': False, 'secure': False, 'value': '0', 'name': 'v', 'path': '/'}, {'domain': 'taobao.com', 'httpOnly': False, 'secure': False, 'value': 'd3c715564224a95806681b074dfe771b', 'expiry': 1572231102.270123

## 查找元素

### 单个元素

In [18]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options

# 设置Chrome的headless模式
chrome_options = Options()
chrome_options.add_argument("--headless")

chrome = webdriver.Chrome(options=chrome_options)

chrome.get("https://www.taobao.com")
print("浏览器已打开")

input_first = chrome.find_element_by_id('q')
input_second = chrome.find_element_by_css_selector('#q')
input_third = chrome.find_element_by_xpath('//*[@id="q"]')
print(input_first, input_second, input_third, sep="\n")
chrome.close()
print("浏览器已关闭")

浏览器已打开
<selenium.webdriver.remote.webelement.WebElement (session="cd631433c4d7db1dc02eb5e7791fefde", element="d5965a52-b5a3-4671-a254-8b5b22742eb1")>
<selenium.webdriver.remote.webelement.WebElement (session="cd631433c4d7db1dc02eb5e7791fefde", element="d5965a52-b5a3-4671-a254-8b5b22742eb1")>
<selenium.webdriver.remote.webelement.WebElement (session="cd631433c4d7db1dc02eb5e7791fefde", element="d5965a52-b5a3-4671-a254-8b5b22742eb1")>
浏览器已关闭


查找单个元素的一些方法：
- find_element(by='id', value=None)
- find_element_by_name(name)
- find_element_by_css_selector(css_selector)
- find_element_by_id(id_)
- find_element_by_link_text(link_text)
- find_element_by_class_name(name)
- find_element_by_xpath(xpath)
- find_element_by_tag_name(name)
- find_element_by_partial_link_text(link_text)


In [1]:
%%timeit -n1 -r1
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By

# 设置Chrome的headless模式
chrome_options = Options()
chrome_options.add_argument("--headless")

chrome = webdriver.Chrome(options=chrome_options)

chrome.get("https://www.taobao.com")
print("浏览器已打开")

input_first = chrome.find_element(By.ID, 'q')
print(input_first)
chrome.close()
print("浏览器已关闭")

浏览器已打开
<selenium.webdriver.remote.webelement.WebElement (session="a7a96c27fd3c41d606d52d6f4e5709cb", element="02306ff5-b97a-45bb-a24a-9d51400bd9af")>
浏览器已关闭
6.9 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


### 多个元素

In [2]:
%%timeit -n1 -r1
from selenium import webdriver
from selenium.webdriver.chrome.options import Options

# 设置Chrome的headless模式
chrome_options = Options()
chrome_options.add_argument("--headless")

chrome = webdriver.Chrome(options=chrome_options)

chrome.get("https://www.taobao.com")
print("浏览器已打开")

input_first = chrome.find_elements_by_css_selector('.service-bd li')
print(input_first)
chrome.close()
print("浏览器已关闭")

浏览器已打开
[<selenium.webdriver.remote.webelement.WebElement (session="d13a09d3b42f962a09d986c5f4ed1b73", element="83a4d19a-22f5-4c84-83a5-2420497c77e5")>, <selenium.webdriver.remote.webelement.WebElement (session="d13a09d3b42f962a09d986c5f4ed1b73", element="9768ecf1-ba49-4239-b647-dac52a28d258")>, <selenium.webdriver.remote.webelement.WebElement (session="d13a09d3b42f962a09d986c5f4ed1b73", element="d7a2c8a8-c78a-45d2-8d38-8ad620d6c398")>, <selenium.webdriver.remote.webelement.WebElement (session="d13a09d3b42f962a09d986c5f4ed1b73", element="abb20f48-50f0-4b56-97c8-8efbec8b0a63")>, <selenium.webdriver.remote.webelement.WebElement (session="d13a09d3b42f962a09d986c5f4ed1b73", element="79168711-a072-4d34-b9d2-49409c45b3c1")>, <selenium.webdriver.remote.webelement.WebElement (session="d13a09d3b42f962a09d986c5f4ed1b73", element="00b73105-872a-410e-a6b7-ed5b65292082")>, <selenium.webdriver.remote.webelement.WebElement (session="d13a09d3b42f962a09d986c5f4ed1b73", element="bcaffff4-e292-4fdf-9dc1-0

In [8]:
%%timeit -n1 -r1
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By

# 设置Chrome的headless模式
chrome_options = Options()
chrome_options.add_argument("--headless")

chrome = webdriver.Chrome(options=chrome_options)

chrome.get("https://www.taobao.com")
print("浏览器已打开")

input_first = chrome.find_elements(By.CSS_SELECTOR,'.service-bd li')
print([(item.text,item.size) for item in input_first])
for i in range(len(input_first)):
    input_first[i].screenshot("output/%6d.png"%i)
chrome.close()
print("浏览器已关闭")

浏览器已打开
[('女装 / 男装 / 内衣\n\ue62e', {'height': 32, 'width': 189}), ('鞋靴 / 箱包 / 配件\n\ue62e', {'height': 32, 'width': 189}), ('童装玩具 / 孕产 / 用品\n\ue62e', {'height': 32, 'width': 189}), ('家电 / 数码 / 手机\n\ue62e', {'height': 32, 'width': 189}), ('美妆 / 洗护 / 保健品\n\ue62e', {'height': 32, 'width': 189}), ('珠宝 / 眼镜 / 手表\n\ue62e', {'height': 32, 'width': 189}), ('运动 / 户外 / 乐器\n\ue62e', {'height': 32, 'width': 189}), ('游戏 / 动漫 / 影视\n\ue62e', {'height': 32, 'width': 189}), ('美食 / 生鲜 / 零食\n\ue62e', {'height': 32, 'width': 189}), ('鲜花 / 宠物 / 农资\n\ue62e', {'height': 32, 'width': 189}), ('工具 / 装修 / 建材\n\ue62e', {'height': 32, 'width': 189}), ('家具 / 家饰 / 家纺\n\ue62e', {'height': 32, 'width': 189}), ('汽车 / 二手车 / 用品\n\ue62e', {'height': 32, 'width': 189}), ('办公 / DIY / 五金电子\n\ue62e', {'height': 32, 'width': 189}), ('百货 / 餐厨 / 家庭保健\n\ue62e', {'height': 32, 'width': 189}), ('学习 / 卡券 / 本地服务\n\ue62e', {'height': 32, 'width': 189})]
浏览器已关闭
11.3 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


查找多个元素的一些方法：
- find_elements(by='id', value=None)
- find_elements_by_name(name)
- find_elements_by_css_selector(css_selector)
- find_elements_by_id(id_)
- find_elements_by_link_text(link_text)
- find_elements_by_class_name(name)
- find_elements_by_xpath(xpath)
- find_elements_by_tag_name(name)
- find_elements_by_partial_link_text(link_text)
