In [1]:
html_doc = """
<html><head><title>The Dormouse's story</title></head>
<body>
<p class="title"><b>The Dormouse's story</b></p>

<p class="story">Once upon a time there were three little sisters; and their names were
<a href="http://example.com/elsie" class="sister" id="link1">Elsie</a>,
<a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and
<a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>;
and they lived at the bottom of a well.</p>

<p class="story">...</p>
"""

In [2]:
html_doc

'\n<html><head><title>The Dormouse\'s story</title></head>\n<body>\n<p class="title"><b>The Dormouse\'s story</b></p>\n\n<p class="story">Once upon a time there were three little sisters; and their names were\n<a href="http://example.com/elsie" class="sister" id="link1">Elsie</a>,\n<a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and\n<a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>;\nand they lived at the bottom of a well.</p>\n\n<p class="story">...</p>\n'

# PARSERS
1. html.pareser
   ###### **Advantages** 
    - Batteries included
    - Decent speed
    - Lenient (As of Python 2.7.3 and 3.2.)
   ###### **DisAdvantages** 
    - Not as fast as lxml, less lenient than html5lib.

2. lxml
   ###### **Advantages** 
    - Very fast
    - Lenient
   ###### **DisAdvantages**
    - External C dependency

3. lxml-xml or xml
   ###### **Advantages** 
    - Very fast
    - The only currently supported XML parser
   ###### **DisAdvantages** 
    - External C dependency

4. html5lib
   ###### **Advantages** 
    - Extremely lenient
    - Parses pages the same way a web browser does
    - Creates valid HTML5
   ###### **DisAdvantages** 
    - Very slow
    - External Python dependency

# Going down

In [36]:
from bs4 import BeautifulSoup

soup = BeautifulSoup(html_doc, 'lxml')

In [6]:
soup.head

<head><title>The Dormouse's story</title></head>

In [7]:
soup.name

'[document]'

In [9]:
soup.title

<title>The Dormouse's story</title>

In [10]:
soup.a

<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>

In [11]:
soup.find_all('a')

[<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
 <a class="sister" href="http://example.com/lacie" id="link2">Lacie</a>,
 <a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>]

In [14]:
soup.contents[0]

<html><head><title>The Dormouse's story</title></head>
<body>
<p class="title"><b>The Dormouse's story</b></p>
<p class="story">Once upon a time there were three little sisters; and their names were
<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
<a class="sister" href="http://example.com/lacie" id="link2">Lacie</a> and
<a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>;
and they lived at the bottom of a well.</p>
<p class="story">...</p>
</body></html>

In [37]:
soup.children

<list_iterator at 0x15aee14bcd0>

In [38]:
soup.descendants

<generator object Tag.descendants at 0x0000015AEE121EE0>

In [39]:
soup.string

In [40]:
soup.stripped_strings

<generator object PageElement.stripped_strings at 0x0000015AEE1936B0>

# Going Up

In [41]:
soup.parent

In [42]:
soup.parents

<generator object PageElement.parents at 0x0000015AEE0CF1C0>

# Going Sideways

In [43]:
soup.nextSibling

In [44]:
soup.next_siblings

<generator object PageElement.next_siblings at 0x0000015AEE0CF700>

In [45]:
soup.previous_sibling

In [46]:
soup.previous_siblings

<generator object PageElement.previous_siblings at 0x0000015AEE0CF940>

# Going back and forth

In [47]:
soup.next_element

In [48]:
soup.next_elements

<generator object PageElement.next_elements at 0x0000015AEE0CFF40>

In [49]:
soup.previous_elements

<generator object PageElement.previous_elements at 0x0000015AEE0CFE80>

# find_parents() and find_parent()

# find_next_siblings() and find_next_sibling()

# find_previous_siblings() and find_previous_sibling()

# find_all_next() and find_next()

# find_all_previous() and find_previous()