In [1]:
# Install Playwright and its system/browser dependencies in this Colab VM
!pip -q install playwright

# Install required Linux libraries (Debian/Ubuntu) for browsers
!python -m playwright install-deps

# Install just Chromium to keep it light (you can do 'playwright install' to get all)
!python -m playwright install chromium


Downloading Chromium 140.0.7339.16 (playwright build v1187)[2m from https://cdn.playwright.dev/dbazure/download/playwright/builds/chromium/1187/chromium-mac-arm64.zip[22m
Chromium 140.0.7339.16 (playwright build v1187) downloaded to /Users/benjaminlevine/Library/Caches/ms-playwright/chromium-1187
Downloading FFMPEG playwright build v1011[2m from https://cdn.playwright.dev/dbazure/download/playwright/builds/ffmpeg/1011/ffmpeg-mac-arm64.zip[22m
FFMPEG playwright build v1011 downloaded to /Users/benjaminlevine/Library/Caches/ms-playwright/ffmpeg-1011
Downloading Chromium Headless Shell 140.0.7339.16 (playwright build v1187)[2m from https://cdn.playwright.dev/dbazure/download/playwright/builds/chromium/1187/chromium-headless-shell-mac-arm64.zip[22m
Chromium Headless Shell 140.0.7339.16 (playwright build v1187) downloaded to /Users/benjaminlevine/Library/Caches/ms-playwright/chromium_headless_shell-1187


In [2]:
!pip -q install playwright
# Try one of these (uncomment one):
!playwright install
# !python -m playwright install

Downloading Firefox 141.0 (playwright build v1490)[2m from https://cdn.playwright.dev/dbazure/download/playwright/builds/firefox/1490/firefox-mac-arm64.zip[22m
Firefox 141.0 (playwright build v1490) downloaded to /Users/benjaminlevine/Library/Caches/ms-playwright/firefox-1490
Downloading Webkit 26.0 (playwright build v2203)[2m from https://cdn.playwright.dev/dbazure/download/playwright/builds/webkit/2203/webkit-mac-15-arm64.zip[22m
Webkit 26.0 (playwright build v2203) downloaded to /Users/benjaminlevine/Library/Caches/ms-playwright/webkit-2203


In [4]:
!python -m playwright install-deps

In [5]:
try:
    from playwright.sync_api import sync_playwright
    print("✅ Playwright is installed and import worked.")
except Exception as e:
    print("❌ Playwright import failed:", e)
    print("If you're in a restricted environment, you may need to run this on your local computer.")

✅ Playwright is installed and import worked.


In [7]:
# Uses Playwright's async API so we can await browser actions (needed in Colab/Jupyter)
from playwright.async_api import async_playwright

# Define an asynchronous function that saves a screenshot of a web page
async def screenshot_page(url: str, out_path: str = "screenshot.png"):   # url to visit; output filename defaults to "screenshot.png"
    async with async_playwright() as p:                                  # start Playwright and clean it up automatically when done
        browser = await p.chromium.launch(                               # launch a headless Chromium browser
            args=["--no-sandbox", "--disable-dev-shm-usage"]             # flags that make Chromium stable inside Colab containers
        )
        page = await browser.new_page()                                  # open a new browser tab/page
        await page.goto(url)                                             # navigate the tab to the requested URL
        await page.screenshot(path=out_path, full_page=True)             # take a full-page screenshot and save it to the given path
        await browser.close()                                            # close the browser to free resources
    print(f"✅ Screenshot saved to {out_path}")                           # let the user know where the file was saved

# Colab supports top-level 'await', so we can call the async function directly here
await screenshot_page("https://google.com", "screenshot2.png")            # run the function: visit Google and save screenshot.png

✅ Screenshot saved to screenshot2.png


In [None]:
import re
from time import sleep
from playwright.async_api import async_playwright

def _safe_filename(title: str) -> str:
    safe = re.sub(r"[^\w\s-]", "", title).strip().replace(" ", "_")
    return safe or "page"

async def save_page_text_async(url: str, selector: str = "body"):
    async with async_playwright() as p:
        # Colab-friendly launch flags
        browser = await p.chromium.launch(args=["--no-sandbox", "--disable-dev-shm-usage"])
        page = await browser.new_page()
        await page.goto(url)
        sleep(5)
        title = await page.title()
        el = await page.query_selector(selector)
        text = await el.inner_text() if el else "No content found for selector."
        filename = _safe_filename(title) + ".txt"
        with open(filename, "w", encoding="utf-8") as f:
            f.write(f"URL: {url}\nTitle: {title}\n\n{text}")
        await browser.close()
    print(f"✅ Saved text to {filename}")

# Colab supports top-level await:
await save_page_text_async("https://bhphotovideo.com", selector="body")
