In [2]:
from bs4 import BeautifulSoup

In [3]:
html = """
<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="UTF-8">
  <title>First HTML Page</title>
</head>
<body>
  <div id="first">
    <h3 data-example="yes">hi</h3>
    <p>more text.</p>
  </div>
  <ol>
    <li class="special">This list item is special.</li>
    <li class="special">This list item is also special.</li>
    <li>This list item is not special.</li>
  </ol>
  <div data-example="yes">bye</div>
</body>
</html>
"""

In [4]:
soup = BeautifulSoup(html, "html.parser")
print(soup.body)

<body>
<div id="first">
<h3 data-example="yes">hi</h3>
<p>more text.</p>
</div>
<ol>
<li class="special">This list item is special.</li>
<li class="special">This list item is also special.</li>
<li>This list item is not special.</li>
</ol>
<div data-example="yes">bye</div>
</body>


In [5]:
print(type(soup))

<class 'bs4.BeautifulSoup'>


In [6]:
# prints the first of two divs
print(soup.body.div)

<div id="first">
<h3 data-example="yes">hi</h3>
<p>more text.</p>
</div>


In [8]:
# find returns the first of two divs
print(soup.find("div"))

<div id="first">
<h3 data-example="yes">hi</h3>
<p>more text.</p>
</div>


In [6]:
# find returns the first occurance
div = soup.find("div")
# each tag is transformed from a string into an object
print(type(div))

<class 'bs4.element.Tag'>


In [8]:
# returns a list of div instances (objects)
print(soup.find_all("div"))

[<div id="first">
<h3 data-example="yes">hi</h3>
<p>more text.</p>
</div>, <div data-example="yes">bye</div>]


In [10]:
# find_all returns a list
print(soup.find_all(class_ = "special"))

[<li class="special">This list item is special.</li>, <li class="special">This list item is also special.</li>]


In [11]:
# selecting by attribute
print(soup.find_all(attrs = { "data-example": "yes" }))

[<h3 data-example="yes">hi</h3>, <div data-example="yes">bye</div>]


In [14]:
# CSS select returns a list
print(soup.select("#first"))

[<div id="first">
<h3 data-example="yes">hi</h3>
<p>more text.</p>
</div>]


In [15]:
# CSS select the element object at index 0
print(soup.select("#first")[0])

<div id="first">
<h3 data-example="yes">hi</h3>
<p>more text.</p>
</div>


In [17]:
# CSS select returns a list
print(soup.select(".special"))

[<li class="special">This list item is special.</li>, <li class="special">This list item is also special.</li>]


In [19]:
print(soup.select('[data-example]'))

[<h3 data-example="yes">hi</h3>, <div data-example="yes">bye</div>]
