### Getting started with BeautifulSoup

In [1]:
from urllib.request import urlopen
from bs4 import BeautifulSoup

### Accessing urls

In [2]:
html = urlopen("https://www.ibm.com/topics/logistic-regression")

###  Reading URLs with "html.parser"

In [3]:
bs = BeautifulSoup(html.read(),'html.parser')

### Reading specific tags

In [28]:
print(bs.title)

<title>What Is Logistic Regression? | IBM</title>


###  Reading URLs with "html5lib"

In [5]:
bs1 = BeautifulSoup(html.read(),"html5lib")

In [6]:
print(bs1)

<html><head></head><body></body></html>


###  Reading URLs with "lxml"

In [7]:
bs2 = BeautifulSoup(html.read(),"lxml")

In [8]:
print(bs2)




### Handling errors

In [9]:
from urllib.error import HTTPError
from urllib.error import URLError

### HTTPError: HTTP error may be “404 Page Not Found,” “500 Internal Server Error,” and so forth. In all of these cases, the urlopen function will throw the generic exception HTTPError

In [11]:
try:
    html = urlopen("https://abc.com//app")
except HTTPError as e:
    print(e)
except URLError as e:
    print("The server could not be found!")
else:
    print("All good")

HTTP Error 404: Not Found


### URLEroor : When the url is wrong/ does not exist

In [12]:
try:
    html = urlopen("https://abc#2.com//app")
except HTTPError as e:
    print(e)
except URLError as e:
    print("The server could not be found!")
else:
    print("All good")

The server could not be found!


### when there is no error

In [16]:
try:
    html = urlopen("https://abc.com")
except HTTPError as e:
    print(e)
except URLError as e:
    print("The server could not be found!")
else:
    print("All good",html.read().title)

All good <built-in method title of bytes object at 0x00000265EB61D040>


### handling missing tags

#### 1. when everything is working

In [21]:
try:
    tag = bs.title
except AttributeError as e:
    print("Tag was not found")
else:
    if tag == None:
        print("Tag not found")
    else:
        print(tag)

<title>What Is Logistic Regression? | IBM</title>


### 2.  the tag is not found (None)

In [22]:
try:
    tag = bs.h1
except AttributeError as e:
    print("Tag was not found")
else:
    if tag == None:
        print("Tag not found")
    else:
        print(tag)

Tag not found


In [None]:
#### 3.  when we try to access attribute for the tag which is not found (None)

In [27]:
try:
    tag = bs.h1.abc
except AttributeError as e:
    print("Tag was not found")
else:
    if tag == None:
        print("Tag not found")
    else:
        print(tag)

Tag was not found
