# Сложный поиск и изменение с Beautiful Soup

In [1]:
from bs4 import BeautifulSoup

In [21]:
html = """<!DOCTYPE html>
<html lang="en">
<head>
<title>test page</title>
</head>
<body class="mybody" id="js-body">
<p class="text odd">first <b>bold</b> paragraph</p>
<p class="text even">second <a href="https://google.com">link</a></p>
<p class="list odd">third <a id="paragraph"><b>bold link</b></a></p>
</body>
</html>
"""
soup = BeautifulSoup(html, "lxml")

In [3]:
soup.p.b.find_parent("body")["id"]

'js-body'

In [5]:
soup.p.find_next_sibling(class_='odd')

<p class="list odd">third <a id="paragraph"><b>bold link</b></a></p>

In [6]:
soup.p.find('b')

<b>bold</b>

In [7]:
soup.find(id='js-body')['class']

['mybody']

In [8]:
soup.find('b', text='bold')

<b>bold</b>

In [9]:
soup.find_all('p')

[<p class="text odd">first <b>bold</b> paragraph</p>,
 <p class="text even">second <a href="https://google.com">link</a></p>,
 <p class="list odd">third <a id="paragraph"><b>bold link</b></a></p>]

In [10]:
soup.find_all('p', 'text odd')

[<p class="text odd">first <b>bold</b> paragraph</p>]

In [11]:
soup.select('p.odd.text')

[<p class="text odd">first <b>bold</b> paragraph</p>]

In [12]:
soup.select("p:nth-of-type(3)")

[<p class="list odd">third <a id="paragraph"><b>bold link</b></a></p>]

In [13]:
soup.select('a > b')

[<b>bold link</b>]

In [14]:
import re

[i.name for i in soup.find_all(name=re.compile('^b'))]

['body', 'b', 'b']

In [15]:
[i for i in soup(['a', 'b'])]

[<b>bold</b>,
 <a href="https://google.com">link</a>,
 <a id="paragraph"><b>bold link</b></a>,
 <b>bold link</b>]

In [22]:
tag = soup.b; tag

<b>bold</b>

In [23]:
tag.name = 'i'
tag['id'] = 'myid'
tag.string = 'italic'
soup.p

<p class="text odd">first <i id="myid">italic</i> paragraph</p>

In [25]:
print(soup.prettify())

<!DOCTYPE html>
<html lang="en">
 <head>
  <title>
   test page
  </title>
 </head>
 <body class="mybody" id="js-body">
  <p class="text odd">
   first
   <i id="myid">
    italic
   </i>
   paragraph
  </p>
  <p class="text even">
   second
   <a href="https://google.com">
    link
   </a>
  </p>
  <p class="list odd">
   third
   <a id="paragraph">
    <b>
     bold link
    </b>
   </a>
  </p>
 </body>
</html>

