# Import Beautiful Soup

In [32]:
from bs4 import BeautifulSoup
import requests

# Creating a soup object


In [2]:
with open("html-doc.html") as f:
    soup = BeautifulSoup(f,"html.parser")  #In-built parser html.parser


In [3]:
type(soup)

bs4.BeautifulSoup

# About soup object
## 1. prettify() method

In [4]:
soup.prettify() #Returns a string of your doc's html code'
print(soup.prettify())

<html>
 <head>
  <title>
   The Dormouse's story
  </title>
 </head>
 <body>
  <p class="title">
   <b>
    The Dormouse's story
   </b>
  </p>
  <p class="story">
   Once upon a time there were three little sisters; and their names were
   <a class="sister" href="http://example.com/elsie" id="link1">
    Elsie
   </a>
   ,
   <a class="sister" href="http://example.com/lacie" id="link2">
    Lacie
   </a>
   and
   <a class="sister" href="http://example.com/tillie" id="link3">
    Tillie
   </a>
   ;
and they lived at the bottom of a well.
  </p>
  <p class="story">
   ...
  </p>
 </body>
</html>



## 2. Targetting indivisual tags

In [5]:
#title tag
soup.title

<title>The Dormouse's story</title>

In [10]:
#p tag
soup.p

<p class="title"><b>The Dormouse's story</b></p>

In [11]:
#a tag
soup.a

<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>

In [6]:
#body tag
soup.body

<body>
<p class="title"><b>The Dormouse's story</b></p>
<p class="story">Once upon a time there were three little sisters; and their names were
<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
<a class="sister" href="http://example.com/lacie" id="link2">Lacie</a> and
<a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>;
and they lived at the bottom of a well.</p>
<p class="story">...</p></body>

In [7]:
#head tag
soup.head

<head><title>The Dormouse's story</title></head>

## 3. Getting contents of a tag

In [8]:
#text attribute of tag
soup.title.text

"The Dormouse's story"

In [9]:
soup.p.text

"The Dormouse's story"

In [16]:
soup.body.text

"\nThe Dormouse's story\nOnce upon a time there were three little sisters; and their names were\nElsie,\nLacie and\nTillie;\nand they lived at the bottom of a well.\n..."

In [17]:
soup.head.text

"The Dormouse's story"

## 4. parent tag

In [10]:
soup.title.parent #Gives head tag

<head><title>The Dormouse's story</title></head>

In [11]:
soup.b.parent

<p class="title"><b>The Dormouse's story</b></p>

In [12]:
soup.p.parent

<body>
<p class="title"><b>The Dormouse's story</b></p>
<p class="story">Once upon a time there were three little sisters; and their names were
<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
<a class="sister" href="http://example.com/lacie" id="link2">Lacie</a> and
<a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>;
and they lived at the bottom of a well.</p>
<p class="story">...</p></body>

In [13]:
soup.title.parent.parent #html tag

<html><head><title>The Dormouse's story</title></head>
<body>
<p class="title"><b>The Dormouse's story</b></p>
<p class="story">Once upon a time there were three little sisters; and their names were
<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
<a class="sister" href="http://example.com/lacie" id="link2">Lacie</a> and
<a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>;
and they lived at the bottom of a well.</p>
<p class="story">...</p></body></html>

# Other tags 
## Children

In [14]:
soup.body.children #returns an iterator

<list_iterator at 0x231215b1f30>

In [15]:
for child in soup.body.children:
    print(child)



<p class="title"><b>The Dormouse's story</b></p>


<p class="story">Once upon a time there were three little sisters; and their names were
<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
<a class="sister" href="http://example.com/lacie" id="link2">Lacie</a> and
<a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>;
and they lived at the bottom of a well.</p>


<p class="story">...</p>


In [16]:
for child in soup.head.children:
    print(child)

<title>The Dormouse's story</title>


## Descendant



In [18]:
soup.body.descendants

<generator object Tag.descendants at 0x00000231230DBD30>

In [19]:
for descendant in soup.body.descendants:
    print(descendant )



<p class="title"><b>The Dormouse's story</b></p>
<b>The Dormouse's story</b>
The Dormouse's story


<p class="story">Once upon a time there were three little sisters; and their names were
<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
<a class="sister" href="http://example.com/lacie" id="link2">Lacie</a> and
<a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>;
and they lived at the bottom of a well.</p>
Once upon a time there were three little sisters; and their names were

<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>
Elsie
,

<a class="sister" href="http://example.com/lacie" id="link2">Lacie</a>
Lacie
 and

<a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>
Tillie
;
and they lived at the bottom of a well.


<p class="story">...</p>
...


## find() 

In [20]:
soup.find('a')

<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>

In [21]:
soup.find('p')

<p class="title"><b>The Dormouse's story</b></p>

## find_all()

In [22]:
soup.find_all('a')

[<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
 <a class="sister" href="http://example.com/lacie" id="link2">Lacie</a>,
 <a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>]

In [23]:
type(soup.find_all('a'))

bs4.element.ResultSet

## get()

In [24]:
soup.a.get('href')

'http://example.com/elsie'

In [25]:
soup.a.get('class')

['sister']

In [31]:
soup.a['href'] #Similler to get() method

'http://example.com/elsie'

In [27]:
soup.a['class']

['sister']

In [30]:
soup.p.get('id') #If doesn't exist , just ignores


In [None]:
soup.p['id'] #If doesn't exist , throws error