## Importing BeautifulSoup

In [1]:
from bs4 import BeautifulSoup

In [2]:
with open('files/SimpleApple.html') as html_code:
    soup = BeautifulSoup(html_code, 'html.parser')

In [3]:
print(soup.prettify())

<!DOCTYPE html>
<html>
 <head>
  <title>
   The three Apples that changed the world
  </title>
 </head>
 <body>
  <h2>
   All you want to know
  </h2>
  <p>
   <p>
    Eve's Apple
   </p>
   <p class="apple">
    Newton's Apple
   </p>
   <p class="steve">
    Steve Job's Apple
   </p>
  </p>
  <p id="3apples">
   Not really an insignificant fruit
   <i>
    is it
   </i>
   !
  </p>
  <!-- Interesting isn't it? -->
 </body>
</html>



#### Modification methods : extend( ), insert( ), clear( ), extract( ), decompose( ), replace_with( ), wrap( ),          unwrap( )

#### extend( )

In [4]:
p_tag = soup.p

p_tag

<p>
<p>Eve's Apple</p>
<p class="apple">Newton's Apple</p>
<p class="steve">Steve Job's Apple</p>
</p>

In [5]:
p_tag = p_tag.p

p_tag

<p>Eve's Apple</p>

#### We can add content to the tag using .extend also, here we are using .extend to add the content to "b" tag

In [6]:
p_tag.extend(" is the first apple")

p_tag

<p>Eve's Apple is the first apple</p>

### insert( )

In [7]:
p_tag = soup.find(class_="apple")

p_tag

<p class="apple">Newton's Apple</p>

#### insert( ) also will work same like append( ), but the main difference is we can give position where we want to add that new string

In [8]:
p_tag.insert(0, "The second apple is ") # 0 indicates the position 

p_tag

<p class="apple">The second apple is Newton's Apple</p>

In [9]:
p_tag = soup.find(class_="steve")

p_tag

<p class="steve">Steve Job's Apple</p>

In [10]:
p_tag.string.insert_before("The third apple is ")

p_tag

<p class="steve">The third apple is Steve Job's Apple</p>

In [11]:
new_a_tag = soup.new_tag('a', href = "https://en.wikipedia.org/wiki/Apple_Store")

new_a_tag.string = ' see here '

In [12]:
p_tag.insert_after(new_a_tag)

In [13]:
print(soup.prettify())

<!DOCTYPE html>
<html>
 <head>
  <title>
   The three Apples that changed the world
  </title>
 </head>
 <body>
  <h2>
   All you want to know
  </h2>
  <p>
   <p>
    Eve's Apple
    i
    s
    t
    h
    e
    f
    i
    r
    s
    t
    a
    p
    p
    l
    e
   </p>
   <p class="apple">
    The second apple is
    Newton's Apple
   </p>
   <p class="steve">
    The third apple is
    Steve Job's Apple
   </p>
   <a href="https://en.wikipedia.org/wiki/Apple_Store">
    see here
   </a>
  </p>
  <p id="3apples">
   Not really an insignificant fruit
   <i>
    is it
   </i>
   !
  </p>
  <!-- Interesting isn't it? -->
 </body>
</html>



### clear( )
#### clear( ) method is used to delete the content from a tag

In [14]:
p_tag

<p class="steve">The third apple is Steve Job's Apple</p>

In [15]:
p_tag.clear()

In [16]:
p_tag

<p class="steve"></p>

In [17]:
p_tag.string = "The third apple is Steve Jobs' Apple"

In [18]:
print(soup.prettify())

<!DOCTYPE html>
<html>
 <head>
  <title>
   The three Apples that changed the world
  </title>
 </head>
 <body>
  <h2>
   All you want to know
  </h2>
  <p>
   <p>
    Eve's Apple
    i
    s
    t
    h
    e
    f
    i
    r
    s
    t
    a
    p
    p
    l
    e
   </p>
   <p class="apple">
    The second apple is
    Newton's Apple
   </p>
   <p class="steve">
    The third apple is Steve Jobs' Apple
   </p>
   <a href="https://en.wikipedia.org/wiki/Apple_Store">
    see here
   </a>
  </p>
  <p id="3apples">
   Not really an insignificant fruit
   <i>
    is it
   </i>
   !
  </p>
  <!-- Interesting isn't it? -->
 </body>
</html>



### extract( )
#### It will delete tag or content and return the tag or content which is deleted from document

In [19]:
h2_tag = soup.h2

h2_tag

<h2>All you want to know</h2>

In [20]:
h2_tag.parent

<body>
<h2>All you want to know</h2>
<p>
<p>Eve's Apple is the first apple</p>
<p class="apple">The second apple is Newton's Apple</p>
<p class="steve">The third apple is Steve Jobs' Apple</p><a href="https://en.wikipedia.org/wiki/Apple_Store"> see here </a>
</p>
<p id="3apples">Not really an insignificant fruit<i>is it</i>!</p>
<!-- Interesting isn't it? -->
</body>

#### Here we are extracting/deleting "b" tag from "h3" tag and it is returning the extracted tag.

In [21]:
h2_tag.extract()

<h2>All you want to know</h2>

In [22]:
"""Even we can still access the tag it is no longer part of the parse tree"""
h2_tag

<h2>All you want to know</h2>

In [23]:
print(h2_tag.parent)

None


In [24]:
print(soup.prettify())

<!DOCTYPE html>
<html>
 <head>
  <title>
   The three Apples that changed the world
  </title>
 </head>
 <body>
  <p>
   <p>
    Eve's Apple
    i
    s
    t
    h
    e
    f
    i
    r
    s
    t
    a
    p
    p
    l
    e
   </p>
   <p class="apple">
    The second apple is
    Newton's Apple
   </p>
   <p class="steve">
    The third apple is Steve Jobs' Apple
   </p>
   <a href="https://en.wikipedia.org/wiki/Apple_Store">
    see here
   </a>
  </p>
  <p id="3apples">
   Not really an insignificant fruit
   <i>
    is it
   </i>
   !
  </p>
  <!-- Interesting isn't it? -->
 </body>
</html>



### decompose( )
#### decompose( ) will delete the tag from a tree and it will destroy it completely, it will not return it

In [25]:
i_tag = soup.i

i_tag

<i>is it</i>

In [26]:
i_tag.decompose()

In [27]:
i_tag

AttributeError: 'NoneType' object has no attribute 'items'

In [28]:
print(i_tag.parent)

None


In [29]:
print(soup.prettify())

<!DOCTYPE html>
<html>
 <head>
  <title>
   The three Apples that changed the world
  </title>
 </head>
 <body>
  <p>
   <p>
    Eve's Apple
    i
    s
    t
    h
    e
    f
    i
    r
    s
    t
    a
    p
    p
    l
    e
   </p>
   <p class="apple">
    The second apple is
    Newton's Apple
   </p>
   <p class="steve">
    The third apple is Steve Jobs' Apple
   </p>
   <a href="https://en.wikipedia.org/wiki/Apple_Store">
    see here
   </a>
  </p>
  <p id="3apples">
   Not really an insignificant fruit
   !
  </p>
  <!-- Interesting isn't it? -->
 </body>
</html>



### replace_with( )
#### replace_with( ) will replace the tag/string from the html tree and replace with the tag/string of your choice.

In [30]:
soup = BeautifulSoup("""<a href ="https://en.wikipedia.org/wiki/Amorphophallus_titanum">
Amorphophallus titanum is 
<h1>a very rare flower</h1>
</a>""")

In [31]:
a_tag = soup.a

a_tag

<a href="https://en.wikipedia.org/wiki/Amorphophallus_titanum">
Amorphophallus titanum is 
<h1>a very rare flower</h1>
</a>

#### Here we are creating the new tag "i".

In [32]:
new_tag = soup.new_tag("i")

In [33]:
new_tag.string = " also called Titan arum"

In [34]:
a_tag.h1.replace_with(new_tag)

a_tag

<a href="https://en.wikipedia.org/wiki/Amorphophallus_titanum">
Amorphophallus titanum is 
<i> also called Titan arum</i>
</a>

### wrap( )
#### wrap( ) will wraps an element into  new tag of your choice and it will return the new wrapper.

In [35]:
soup = BeautifulSoup("<i>I want to travel all over the world</i>")

soup

<html><body><i>I want to travel all over the world</i></body></html>

In [36]:
new_tag = soup.new_tag("b")

In [37]:
soup.i.string.wrap(new_tag)

soup

<html><body><i><b>I want to travel all over the world</b></i></body></html>

### unwrap( )
#### unwrap( ) is completely opposite to wrap( ), this will replaces the tag with the content which is present inside of that tag.

In [38]:
soup = BeautifulSoup("""<h1 id="interest">
You are going to meet all kinds of <b>interesting</b> people
</h1>""")

print(soup.prettify())

<html>
 <body>
  <h1 id="interest">
   You are going to meet all kinds of
   <b>
    interesting
   </b>
   people
  </h1>
 </body>
</html>


In [39]:
h1_tag = soup.h1

h1_tag

<h1 id="interest">
You are going to meet all kinds of <b>interesting</b> people
</h1>

In [40]:
h1_tag.b.unwrap()

h1_tag

<h1 id="interest">
You are going to meet all kinds of interesting people
</h1>

In [41]:
print(soup.prettify())

<html>
 <body>
  <h1 id="interest">
   You are going to meet all kinds of
   interesting
   people
  </h1>
 </body>
</html>
