同步模式

In [None]:
from playwright.sync_api import sync_playwright
with sync_playwright() as p:
    for browser_type in [p.chromium, p.firefox, p.webkit]:
        browser = browser_type.launch(headless=False)
        page = browser.new_page()
        page.goto("https://www.baidu.com")
        page.screenshot(path=f"screenshot-{browser_type.name}.png")
        print(page.title())
        browser.close()

异步模式

In [None]:
import asyncio
from playwright.async_api import async_playwright
async def main():
    async with async_playwright() as p:
        for browser_type in [p.chromium, p.firefox, p.webkit]:
            browser = await browser_type.launch(headless=False)
            page = await browser.new_page()
            await page.goto("https://www.baidu.com")
            await page.screenshot(path=f"screenshot-{browser_type.name}.png")
            print(await page.title())
            await browser.close()
asyncio.run(main())

代码生成

In [None]:
playwright codegen -o script.py -b firefox

支持移动端浏览器

In [None]:
from playwright.sync_api import sync_playwright
with sync_playwright as p:
    iphone_12_por_max=p.devices["iPhone 12 Pro Max"]
    browser=p.chromium.launch(headless=False)
    context=browser.new_context(
        **iphone_12_por_max,
        locale="zh-CN",
    )
    page=context.new_page()
    page.goto("https://wwww.whatismybrowser.com/")
    page.wait_for_load_state("networkidle")
    page.screenshot(path="iphone-12-pro-max.png")
    browser.close()

事件监听

In [None]:
from playwright.sync_api import sync_playwright
def on_response(response):
    print(f'Statue {response.status}: {response.url}')
with sync_playwright() as p:
    browser=p.chromium.launch(headless=False)
    page=browser.new_page()
    page.on('response', on_response)
    page.goto('https://spa6.scrape.center/')
    page.wait_for_load_state('networkidle')
    browser.close()

In [None]:
from playwright.sync_api import sync_playwright
def on_response(response):
    if '/api/movie/' in response.url and response.status == 200:
        print(response.json())

with sync_playwright() as p:
    
    browser=p.chromium.launch(headless=False)
    page=browser.new_page()
    page.on('response', on_response)
    page.goto('https://spa6.scrape.center/')
    page.wait_for_load_state('networkidle')
    browser.close()

获取节点属性

In [None]:
from playwright.sync_api import sync_playwright
with sync_playwright() as p:
    browser=p.chromium.launch(headless=False)
    page=browser.new_page()
    page.goto('https://spa6.scrape.center/')
    page.wait_for_load_state('networkidle')
    href=page.get_attribute('a.name', 'href')
    print(href)
    browser.close()
                            

In [None]:
from playwright.sync_api import sync_playwright
with sync_playwright() as p:
    browser=p.chromium.launch(headless=False)
    page=browser.new_page()
    page.goto('https://spa6.scrape.center/')
    page.wait_for_load_state('networkidle')
    elements=page.query_selector_all('a.name')
    for element in elements:
        print(element.get_attribute('href'))
        print(element.text_content())
    browser.close()

In [None]:
from playwright.sync_api import sync_playwright
with sync_playwright() as p:
    browser=p.chromium.launch(headless=False)
    page=browser.new_page()
    page.goto('https://spa6.scrape.center/')
    page.wait_for_load_state('networkidle')
    element=page.query_selector('a.name')
    print(element.get_attribute('href'))
    print(element.text_content())
    browser.close()

网络劫持

In [None]:
from playwright.sync_api import sync_playwright
import re
with sync_playwright() as p:
    browser=p.chromium.launch(headless=False)
    page=browser.new_page()

    def cancel_request(route, request):
        route.abort()

    page.route(re.compile(r"(\.png)|(\.jpg)"),cancel_request)
    page.goto('https://spa6.scrape.center/')
    page.wait_for_load_state('networkidle')
    page.screenshot(path='no_picture.png')
    browser.close()  