In [8]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
import os

# URL of the webpage to scrape
url = "https://indiankanoon.org/doc/173448784"

# Send GET request to fetch the webpage content
response = requests.get(url)
soup = BeautifulSoup(response.content, "html.parser")

# Create the 'pdfs' directory if it doesn't exist
if not os.path.exists("pdfs"):
    os.makedirs("pdfs")
    print("Created 'pdfs' directory to save PDFs.")

# List to hold PDF links
pdf_links = []

# Extract all links from the page
for link in soup.find_all("a", href=True):
    if "pdf" in link["href"]:  # Look for 'pdf' in the href attribute
        pdf_links.append(link["href"])

# Debugging: Print the number of PDFs found
print(f"Found {len(pdf_links)} PDF links:")
for pdf in pdf_links:
    print(pdf)

# If PDF links are found, start downloading
if pdf_links:
    for link in pdf_links:
        full_url = urljoin(url, link)  # Combine base URL with the relative link
        try:
            pdf_response = requests.get(full_url)
            pdf_response.raise_for_status()  # Raise an exception for HTTP errors

            # Save the PDF in the 'pdfs' directory
            pdf_filename = (
                f"pdfs/{link.split('/')[-1]}"  # Get the filename from the link
            )
            with open(pdf_filename, "wb") as pdf_file:
                pdf_file.write(pdf_response.content)
                print(f"Saved: {pdf_filename}")  # Confirmation message
        except requests.exceptions.RequestException as e:
            print(f"Failed to download {full_url}: {e}")
else:
    print("No PDF links found on the page.")

Created 'pdfs' directory to save PDFs.
Found 0 PDF links:
No PDF links found on the page.
