In [None]:
import os
import time
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin

BASE_URL = "https://berkeleyca.gov"
PAGE_URL_TEMPLATE = "https://berkeleyca.gov/your-government/boards-commissions/zoning-adjustments-board?page={}"
DOWNLOAD_DIR = "zab_minutes_pdfs"
NUM_PAGES = 9  # pages 0 through 8

# Create directory if it doesn't exist
os.makedirs(DOWNLOAD_DIR, exist_ok=True)

for page in range(NUM_PAGES):
    print(f"\nScraping page {page + 1} of {NUM_PAGES}")
    url = PAGE_URL_TEMPLATE.format(page)
    try:
        resp = requests.get(url)
        resp.raise_for_status()
    except Exception as e:
        print(f"Failed to fetch page {page}: {e}")
        continue

    soup = BeautifulSoup(resp.content, "html.parser")
    rows = soup.find_all("tr", class_="table-expand-row-content")

    found_any = False
    for row in rows:
        minutes_links = row.find_all("a", string="Minutes")
        for link in minutes_links:
            relative_url = link.get("href")
            if relative_url:
                full_url = urljoin(BASE_URL, relative_url)
                filename = os.path.basename(relative_url.split("?")[0])
                save_path = os.path.join(DOWNLOAD_DIR, filename)

                print(f"游닌 Downloading {filename}")
                try:
                    pdf_resp = requests.get(full_url)
                    pdf_resp.raise_for_status()
                    with open(save_path, "wb") as f:
                        f.write(pdf_resp.content)
                    found_any = True
                    time.sleep(1)  # being polite to the server
                except Exception as e:
                    print(f"Error downloading {full_url}: {e}")
    if not found_any:
        print("No 'Minutes' PDFs found on this page.")



游댌 Scraping page 1 of 9
游닌 Downloading 2025-05-22_DRAFT_ZAB%20Minutes.pdf
游닌 Downloading 2025-05-08_FINAL_ZAB%20Minutes.pdf
游닌 Downloading 2025-04-24_FINAL_ZAB%20Minutes.pdf
游닌 Downloading 2025-04-10_ZAB%20Minutes.pdf
游닌 Downloading 2025-03-27_ZAB%20Minutes.pdf
游닌 Downloading 2025-03-13_ZAB%20Minutes.pdf
游닌 Downloading 2025-02-27_ZAB%20Minutes1.pdf
游닌 Downloading 2025-02-13_ZAB%20Minutes.pdf
游닌 Downloading 2025-01-23_ZAB%20Minutes.pdf
游닌 Downloading 2025-01-09_ZAB_Action%20Minutes_FINAL.pdf

游댌 Scraping page 2 of 9
游닌 Downloading 2024-12-12_ZAB%20Minutes.pdf
游닌 Downloading 2024-11-14_ZAB_Action%20Minutes.pdf
游닌 Downloading 2024-10-10_ZAB_Action%20Minutes.pdf
游닌 Downloading 2024-09-26_Action%20Minutes.pdf
游닌 Downloading 2024-09-12_ZAB_Action%20Minutes.pdf
游닌 Downloading 2024-08-08_ZAB_Action%20Minutes.pdf
游닌 Downloading 2024-07-11_ZAB_Action%20Minutes.pdf
游닌 Downloading 2024-06-27_Action%20Minutes.pdf
游닌 Downloading 2024-05-30_Action%20Minutes.pdf
游닌 Downloading 2024-05-09_ZAB_Action%2