# In-Class AI-Assisted PDF Scraping Example
Use an AI tool to scrape Preliminary Prints from the [SCOTUS US Reports website](https://www.supremecourt.gov/opinions/USReports.aspx).

BASE_URL = 'https://www.supremecourt.gov/opinions/USReports.aspx'

Link to ChatGPT transcript: https://chatgpt.com/share/67eb2a9d-82ec-800f-82dd-6868872288e9

In [1]:
import os
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin

# URL to scrape
base_url = "https://www.supremecourt.gov/opinions/USReports.aspx"
pdf_base = "https://www.supremecourt.gov/opinions/"

# Create folder to store PDFs
os.makedirs("pdfs", exist_ok=True)

# Get the page content
response = requests.get(base_url)
soup = BeautifulSoup(response.content, "html.parser")

# Find the Preliminary Prints section
prelim_section = soup.find("div", id="cellPre")

# Find all links to PDFs within that section
pdf_links = prelim_section.find_all("a", href=True)

# Loop over the links and download PDFs
for link in pdf_links:
    href = link["href"]
    if href.endswith(".pdf"):
        full_url = urljoin(pdf_base, href)
        filename = os.path.join("pdfs", os.path.basename(href))

        print(f"Downloading {filename}...")
        pdf_response = requests.get(full_url)
        with open(filename, "wb") as f:
            f.write(pdf_response.content)

print("Done downloading all PDFs.")



Downloading pdfs/587US1PP_web.pdf...
Downloading pdfs/586US1PP_final.pdf...
Downloading pdfs/586US2PP_final.pdf...
Downloading pdfs/585US1PP_final.pdf...
Downloading pdfs/585US2PP_final.pdf...
Downloading pdfs/584US1PP_final.pdf...
Downloading pdfs/584US2PP_final.pdf...
Downloading pdfs/583US1PP_final.pdf...
Downloading pdfs/583US2PP_final.pdf...
Done downloading all PDFs.
