# Beautiful Soup (WebPage)

In [1]:
import requests
from bs4 import BeautifulSoup

In [8]:
def fetch_urlsoup(url, timeout=10):
    """Download HTML content from a URL and return a BeautifulSoup object."""
    try:
        # Try to download the webpage
        response = requests.get(url, timeout=timeout)
        response.raise_for_status()
        
        # Return BeautifulSoup object if successful
        return BeautifulSoup(response.text, 'html.parser')
    
    except requests.exceptions.HTTPError as e:
        print(f"HTTP Error: {e}")
    except requests.exceptions.ConnectionError as e:
        print(f"Connection Error: {e}")
    except requests.exceptions.Timeout as e:
        print(f"Timeout Error: {e}")
    except requests.exceptions.RequestException as e:
        print(f"Request Exception: {e}")
    
    # Return None if any exception occurred
    return None

In [9]:
soup_ex = fetch_urlsoup("https://example.com")

In [11]:
soup_ex

<!DOCTYPE html>

<html>
<head>
<title>Example Domain</title>
<meta charset="utf-8"/>
<meta content="text/html; charset=utf-8" http-equiv="Content-type"/>
<meta content="width=device-width, initial-scale=1" name="viewport"/>
<style type="text/css">
    body {
        background-color: #f0f0f2;
        margin: 0;
        padding: 0;
        font-family: -apple-system, system-ui, BlinkMacSystemFont, "Segoe UI", "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
        
    }
    div {
        width: 600px;
        margin: 5em auto;
        padding: 2em;
        background-color: #fdfdff;
        border-radius: 0.5em;
        box-shadow: 2px 3px 7px 2px rgba(0,0,0,0.02);
    }
    a:link, a:visited {
        color: #38488f;
        text-decoration: none;
    }
    @media (max-width: 700px) {
        div {
            margin: 0 auto;
            width: auto;
        }
    }
    </style>
</head>
<body>
<div>
<h1>Example Domain</h1>
<p>This domain is for use in illustrative example

In [10]:
print(soup_ex.title.text)  # Print the page title

Example Domain


In [13]:
# Extract all URLs
for link in soup_ex.find_all('a'):
    print(link.get('href'))

https://www.iana.org/domains/example


In [14]:
soup_ex.get_text()

'\n\n\nExample Domain\n\n\n\n\n\n\n\nExample Domain\nThis domain is for use in illustrative examples in documents. You may use this\n    domain in literature without prior coordination or asking for permission.\nMore information...\n\n\n\n'