# 23. Web Scraping - BeautifulSoup

In [1]:
# step to be followed:-

#1 : load html [requests lib]
#2 : parse HTML [beautifulsoup lib]
#3 : locate and extract the desired data

In [5]:
from bs4 import BeautifulSoup as bs
import requests

In [57]:
html = '<!DOCTYPE html>\
<html>\
<head>\
<title> Testing Web Page </title>\
</head>\
<body>\
<h1> Web Scraping </h1>\
<p id = "first_para">\
Let\'s start learning \
<b>\
Web Scraping\
</b>\
</p>\
<p class = "abc" id = "second_para">\
You can read more about BeautifulSoup from <a href="https://www.crummy.com/software/BeautifulSoup/bs4/doc/"> here </a>\
</p>\
<p class = "abc">\
<a href = "https://codingninjas.in/"> Coding Ninjas </a>\
</p>\
</body>\
</html>'

In [58]:
data = bs(html,'html.parser')
data

<!DOCTYPE html>
<html><head><title> Testing Web Page </title></head><body><h1> Web Scraping </h1><p id="first_para">Let's start learning <b>Web Scraping</b></p><p class="abc" id="second_para">You can read more about BeautifulSoup from <a href="https://www.crummy.com/software/BeautifulSoup/bs4/doc/"> here </a></p><p class="abc"><a href="https://codingninjas.in/"> Coding Ninjas </a></p></body></html>

In [9]:
type(data)

bs4.BeautifulSoup

In [10]:
# beautify the data
print(data.prettify())

<!DOCTYPE html>
<html>
 <head>
  <title>
   Testing Web Page
  </title>
 </head>
 <body>
  <h1>
   Web Scraping
  </h1>
  <p id="first_para">
   Let's start learning
   <b>
    Web Scraping
   </b>
  </p>
  <p class="abc" id="second_para">
   You can read more about BeautifulSoup from
   <a href="https://www.crummy.com/software/BeautifulSoup/bs4/doc/">
    here
   </a>
  </p>
  <p class="abc">
   <a href="https://codingninjas.in/">
    Coding Ninjas
   </a>
  </p>
 </body>
</html>


In [15]:
# extract ->title
# data.(tag_name)
print(data.title)
print(data.head)
print(data.h1)
print(data.p)

<title> Testing Web Page </title>
<head><title> Testing Web Page </title></head>
<h1> Web Scraping </h1>
<p id="first_para">Let's start learning <b>Web Scraping</b></p>


In [17]:
# extract content inside tag 
print(data.title) # complete desc of tag
print(data.title.name) # tag name
print(data.title.string) # only content of tag

<title> Testing Web Page </title>
title
 Testing Web Page 


In [21]:
print(data.title.attrs) # show all attribute in particular tag present
print(data.p.attrs)

{}
{'id': 'first_para'}


In [28]:
# get value of id
print(data.p.get('id')) # by using get()
print(data.p['id']) # by using dict concept

first_para
first_para


In [30]:
print(data.get_text()) # only text , extrxct all text 

 Testing Web Page  Web Scraping Let's start learning Web ScrapingYou can read more about BeautifulSoup from  here  Coding Ninjas 


In [33]:
# find() -> return first occurence of ele
print(data.find('p'))
print(data.find('pr')) # none

<p id="first_para">Let's start learning <b>Web Scraping</b></p>
None


In [35]:
# find_all() -- >all occurence of ele
print(data.find_all('p'))

# or
li = data.find_all('p')
for i in li:
    print(i)

[<p id="first_para">Let's start learning <b>Web Scraping</b></p>, <p class="abc" id="second_para">You can read more about BeautifulSoup from <a href="https://www.crummy.com/software/BeautifulSoup/bs4/doc/"> here </a></p>, <p class="abc"><a href="https://codingninjas.in/"> Coding Ninjas </a></p>]
<p id="first_para">Let's start learning <b>Web Scraping</b></p>
<p class="abc" id="second_para">You can read more about BeautifulSoup from <a href="https://www.crummy.com/software/BeautifulSoup/bs4/doc/"> here </a></p>
<p class="abc"><a href="https://codingninjas.in/"> Coding Ninjas </a></p>


### Navigate Tree

In [61]:
# searching -> find() and find_all()
data.find_all(['p','a'])

[<p id="first_para">Let's start learning <b>Web Scraping</b></p>,
 <p class="abc" id="second_para">You can read more about BeautifulSoup from <a href="https://www.crummy.com/software/BeautifulSoup/bs4/doc/"> here </a></p>,
 <a href="https://www.crummy.com/software/BeautifulSoup/bs4/doc/"> here </a>,
 <p class="abc"><a href="https://codingninjas.in/"> Coding Ninjas </a></p>,
 <a href="https://codingninjas.in/"> Coding Ninjas </a>]

In [63]:
data.find_all(True)

[<html><head><title> Testing Web Page </title></head><body><h1> Web Scraping </h1><p id="first_para">Let's start learning <b>Web Scraping</b></p><p class="abc" id="second_para">You can read more about BeautifulSoup from <a href="https://www.crummy.com/software/BeautifulSoup/bs4/doc/"> here </a></p><p class="abc"><a href="https://codingninjas.in/"> Coding Ninjas </a></p></body></html>,
 <head><title> Testing Web Page </title></head>,
 <title> Testing Web Page </title>,
 <body><h1> Web Scraping </h1><p id="first_para">Let's start learning <b>Web Scraping</b></p><p class="abc" id="second_para">You can read more about BeautifulSoup from <a href="https://www.crummy.com/software/BeautifulSoup/bs4/doc/"> here </a></p><p class="abc"><a href="https://codingninjas.in/"> Coding Ninjas </a></p></body>,
 <h1> Web Scraping </h1>,
 <p id="first_para">Let's start learning <b>Web Scraping</b></p>,
 <b>Web Scraping</b>,
 <p class="abc" id="second_para">You can read more about BeautifulSoup from <a href=

In [65]:
# find id using id-name
data.find_all(id = 'first_para')

[<p id="first_para">Let's start learning <b>Web Scraping</b></p>]

In [68]:
print(data.find_all(class_ = 'abc'))

[<p class="abc" id="second_para">You can read more about BeautifulSoup from <a href="https://www.crummy.com/software/BeautifulSoup/bs4/doc/"> here </a></p>, <p class="abc"><a href="https://codingninjas.in/"> Coding Ninjas </a></p>]


#### Going down 

In [72]:
print(data.prettify())

<!DOCTYPE html>
<html>
 <head>
  <title>
   Testing Web Page
  </title>
 </head>
 <body>
  <h1>
   Web Scraping
  </h1>
  <p id="first_para">
   Let's start learning
   <b>
    Web Scraping
   </b>
  </p>
  <p class="abc" id="second_para">
   You can read more about BeautifulSoup from
   <a href="https://www.crummy.com/software/BeautifulSoup/bs4/doc/">
    here
   </a>
  </p>
  <p class="abc">
   <a href="https://codingninjas.in/">
    Coding Ninjas
   </a>
  </p>
 </body>
</html>


In [75]:
print(data.head)
print(data.head.title)
print(data.title)

<head><title> Testing Web Page </title></head>
<title> Testing Web Page </title>
<title> Testing Web Page </title>


In [79]:
print(data.title.string)

 Testing Web Page 


In [83]:
li = data.find_all('p')
for i in li:
    print(i.string)

None
None
 Coding Ninjas 


In [86]:
li = data.find_all('p') # find multiple children value
for i in li:
    print(list(i.strings))

["Let's start learning ", 'Web Scraping']
['You can read more about BeautifulSoup from ', ' here ']
[' Coding Ninjas ']


In [89]:
li = data.find_all('p') # find multiple children value and remove all extra spaces
for i in li:
    print(list(i.stripped_strings))

["Let's start learning", 'Web Scraping']
['You can read more about BeautifulSoup from', 'here']
['Coding Ninjas']


In [94]:
# contents -> give list
li = data.html.contents
print(li)
print(len(li)) # head and body so 2


[<head><title> Testing Web Page </title></head>, <body><h1> Web Scraping </h1><p id="first_para">Let's start learning <b>Web Scraping</b></p><p class="abc" id="second_para">You can read more about BeautifulSoup from <a href="https://www.crummy.com/software/BeautifulSoup/bs4/doc/"> here </a></p><p class="abc"><a href="https://codingninjas.in/"> Coding Ninjas </a></p></body>]
2


In [97]:
li = data.html.children #children -> give ittrator
for i in li:
    print(i)

<head><title> Testing Web Page </title></head>
<body><h1> Web Scraping </h1><p id="first_para">Let's start learning <b>Web Scraping</b></p><p class="abc" id="second_para">You can read more about BeautifulSoup from <a href="https://www.crummy.com/software/BeautifulSoup/bs4/doc/"> here </a></p><p class="abc"><a href="https://codingninjas.in/"> Coding Ninjas </a></p></body>


In [106]:
li = data.html.descendants
for i in li:
    print(i)
    

<head><title> Testing Web Page </title></head>
<title> Testing Web Page </title>
 Testing Web Page 
<body><h1> Web Scraping </h1><p id="first_para">Let's start learning <b>Web Scraping</b></p><p class="abc" id="second_para">You can read more about BeautifulSoup from <a href="https://www.crummy.com/software/BeautifulSoup/bs4/doc/"> here </a></p><p class="abc"><a href="https://codingninjas.in/"> Coding Ninjas </a></p></body>
<h1> Web Scraping </h1>
 Web Scraping 
<p id="first_para">Let's start learning <b>Web Scraping</b></p>
Let's start learning 
<b>Web Scraping</b>
Web Scraping
<p class="abc" id="second_para">You can read more about BeautifulSoup from <a href="https://www.crummy.com/software/BeautifulSoup/bs4/doc/"> here </a></p>
You can read more about BeautifulSoup from 
<a href="https://www.crummy.com/software/BeautifulSoup/bs4/doc/"> here </a>
 here 
<p class="abc"><a href="https://codingninjas.in/"> Coding Ninjas </a></p>
<a href="https://codingninjas.in/"> Coding Ninjas </a>
 Cod

In [107]:
# parent -> give direct html while patents give ittratoer
print(data.html.parent) 
li = data.html.parents
for i in li:
    print(i)

<!DOCTYPE html>
<html><head><title> Testing Web Page </title></head><body><h1> Web Scraping </h1><p id="first_para">Let's start learning <b>Web Scraping</b></p><p class="abc" id="second_para">You can read more about BeautifulSoup from <a href="https://www.crummy.com/software/BeautifulSoup/bs4/doc/"> here </a></p><p class="abc"><a href="https://codingninjas.in/"> Coding Ninjas </a></p></body></html>
<!DOCTYPE html>
<html><head><title> Testing Web Page </title></head><body><h1> Web Scraping </h1><p id="first_para">Let's start learning <b>Web Scraping</b></p><p class="abc" id="second_para">You can read more about BeautifulSoup from <a href="https://www.crummy.com/software/BeautifulSoup/bs4/doc/"> here </a></p><p class="abc"><a href="https://codingninjas.in/"> Coding Ninjas </a></p></body></html>


### First Web-page   


In [141]:
res = requests.get('http://info.cern.ch/hypertext/WWW/TheProject.html')
#print(res)
#print(res.headers)
html_data = res.text

In [144]:
data = bs(html_data,"html.parser")
print(data.prettify())

<header>
 <title>
  The World Wide Web project
 </title>
 <nextid n="55"/>
</header>
<body>
 <h1>
  World Wide Web
 </h1>
 The WorldWideWeb (W3) is a wide-area
 <a href="WhatIs.html" name="0">
  hypermedia
 </a>
 information retrieval
initiative aiming to give universal
access to a large universe of documents.
 <p>
  Everything there is online about
W3 is linked directly or indirectly
to this document, including an
  <a href="Summary.html" name="24">
   executive
summary
  </a>
  of the project,
  <a href="Administration/Mailing/Overview.html" name="29">
   Mailing lists
  </a>
  ,
  <a href="Policy.html" name="30">
   Policy
  </a>
  , November's
  <a href="News/9211.html" name="34">
   W3  news
  </a>
  ,
  <a href="FAQ/List.html" name="41">
   Frequently Asked Questions
  </a>
  .
  <dl>
   <dt>
    <a href="../DataSources/Top.html" name="44">
     What's out there?
    </a>
    <dd>
     Pointers to the
world's online information,
     <a href="../DataSources/bySubject/Overview.htm

In [147]:
data.h1.string


'World Wide Web'

In [150]:
data.title.string

'The World Wide Web project'

In [153]:
print(data.get_text())


The World Wide Web project



World Wide WebThe WorldWideWeb (W3) is a wide-area
hypermedia information retrieval
initiative aiming to give universal
access to a large universe of documents.
Everything there is online about
W3 is linked directly or indirectly
to this document, including an executive
summary of the project, Mailing lists
, Policy , November's  W3  news ,
Frequently Asked Questions .

What's out there?
 Pointers to the
world's online information, subjects
, W3 servers, etc.
Help
 on the browser you are using
Software Products
 A list of W3 project
components and their current state.
(e.g. Line Mode ,X11 Viola ,  NeXTStep
, Servers , Tools , Mail robot ,
Library )
Technical
 Details of protocols, formats,
program internals etc
Bibliography
 Paper documentation
on  W3 and references.
People
 A list of some people involved
in the project.
History
 A summary of the history
of the project.
How can I help ?
 If you would like
to support the web..
Getting code
 Getting the cod

In [158]:
a = data.find_all('a') # all hyper links

for i in a:
    print(i.get('href'))

WhatIs.html
Summary.html
Administration/Mailing/Overview.html
Policy.html
News/9211.html
FAQ/List.html
../DataSources/Top.html
../DataSources/bySubject/Overview.html
../DataSources/WWW/Servers.html
Help.html
Status.html
LineMode/Browser.html
Status.html#35
NeXT/WorldWideWeb.html
Daemon/Overview.html
Tools/Overview.html
MailRobot/Overview.html
Status.html#57
Technical.html
Bibliography.html
People.html
History.html
Helping.html
../README.html
LineMode/Defaults/Distribution.html


In [161]:
a = data.find_all('a') # all hyper links

for i in a:
    print(i.string)


hypermedia
executive
summary
Mailing lists
Policy
W3  news
Frequently Asked Questions
What's out there?
 subjects
W3 servers
Help
Software Products
Line Mode
Viola
NeXTStep
Servers
Tools
 Mail robot

Library
Technical
Bibliography
People
History
How can I help
Getting code

anonymous FTP


In [165]:
a = data.dl.find_all('a')
for i in a:
    print(i.string)

What's out there?
 subjects
W3 servers
Help
Software Products
Line Mode
Viola
NeXTStep
Servers
Tools
 Mail robot

Library
Technical
Bibliography
People
History
How can I help
Getting code

anonymous FTP


In [170]:

a = data.dl.find_all('dt')
for i in a:
    print(i.a.string)

What's out there?
Help
Software Products
Technical
Bibliography
People
History
How can I help
Getting code


## ASSIGNMENT SOLUTION 

#### Solution 1

In [38]:
html = '<!DOCTYPE html><html><head><title>Learning Beautiful Soup</title></head>\
<body><h1> About Us </h1><div class = "first_div"><p>Coding Ninjas Website</p>\
<a href="https://www.codingninjas.in/">Link to Coding Ninjas Website</a>\
<ul><li>This</li><li>is</li><li>an</li><li>unordered</li><li>list.</li></ul>\
</div><p id = "template_p">This is a template paragraph tag</p>\
<a href = "https://www.facebook.com/codingninjas/">\
This is the link of our Facebook Page</a></body></html>'
data = bs(html,"html.parser")
#print(data.prettify())
print(data.body)

<body><h1> About Us </h1><div class="first_div"><p>Coding Ninjas Website</p><a href="https://www.codingninjas.in/">Link to Coding Ninjas Website</a><ul><li>This</li><li>is</li><li>an</li><li>unordered</li><li>list.</li></ul></div><p id="template_p">This is a template paragraph tag</p><a href="https://www.facebook.com/codingninjas/">This is the link of our Facebook Page</a></body>


#### Solution 2

In [45]:
html = '<!DOCTYPE html><html><head><title>Learning Beautiful Soup</title></head>\
<body><h1> About Us </h1><div class = "first_div"><p>Coding Ninjas Website</p>\
<a href="https://www.codingninjas.in/">Link to Coding Ninjas Website</a>\
<ul><li>This</li><li>is</li><li>an</li><li>unordered</li><li>list.</li></ul>\
</div><p id = "template_p">This is a template paragraph tag</p>\
<a href = "https://www.facebook.com/codingninjas/">\
This is the link of our Facebook Page</a></body></html>'

data = bs(html,'html.parser')
#print(data.prettify())
li = data.div.attrs
for i in li:
    print(i)

class


#### Solution 3

In [52]:
html = '<!DOCTYPE html><html><head><title>Learning Beautiful Soup</title></head>\
<body><h1> About Us </h1><div class = "first_div"><p>Coding Ninjas Website</p>\
<a href="https://www.codingninjas.in/">Link to Coding Ninjas Website</a>\
<ul><li>This</li><li>is</li><li>an</li><li>unordered</li><li>list.</li></ul>\
</div><p id = "template_p">This is a template paragraph tag</p>\
<a href = "https://www.facebook.com/codingninjas/">\
This is the link of our Facebook Page</a></body></html>'

data = bs(html,"html.parser")
da = data.find_all('li')
for i in da:
    print(i.string,end = " ")

This is an unordered list. 

#### Solution 4

In [56]:
html = '<!DOCTYPE html><html><head><title>Learning Beautiful Soup</title></head>\
<body><h1> About Us </h1><div class = "first_div"><p>Coding Ninjas Website</p>\
<a href="https://www.codingninjas.in/">Link to Coding Ninjas Website</a>\
<ul><li>This</li><li>is</li><li>an</li><li>unordered</li><li>list.</li></ul>\
</div><p id = "template_p">This is a template paragraph tag</p>\
<a href = "https://www.facebook.com/codingninjas/">\
This is the link of our Facebook Page</a></body></html>'

data = bs(html,"html.parser")
a = data.find_all('a')
for i in a:
    print(i.get('href'))

https://www.codingninjas.in/
https://www.facebook.com/codingninjas/


#### Solution 5

In [113]:
html = '<!DOCTYPE html><html><head><title>Navigate Parse Tree</title></head>\
<body><h1>This is your Assignment</h1><a href = "https://www.google.com">This is a link that will take you to Google</a>\
<ul><li><p> This question is given to test your knowledge of <b>Web Scraping</b></p>\
<p>Web scraping is a term used to describe the use of a program or algorithm to extract and process large amounts of data from the web.</p></li>\
<li id = "li2">This is an li tag given to you for scraping</li>\
<li>This li tag gives you the various ways to get data from a website\
<ol><li class = "list_or">Using API of the website</li><li>Scrape data using BeautifulSoup</li><li>Scrape data using Selenium</li>\
<li>Scrape data using Scrapy</li></ol></li>\
<li class = "list_or"><a href="https://www.crummy.com/software/BeautifulSoup/bs4/doc/">\
Clicking on this takes you to the documentation of BeautifulSoup</a>\
<a href="https://selenium-python.readthedocs.io/" id="anchor">Clicking on this takes you to the documentation of Selenium</a>\
</li></ul></body></html>'

data = bs(html,"html.parser")

a = [data.html.descendants]
b = [data.html.children]
print(len(a) - len(b))

0


#### Solution 6

In [122]:
html = '<!DOCTYPE html><html><head><title>Navigate Parse Tree</title></head>\
<body><h1>This is your Assignment</h1><a href = "https://www.google.com">This is a link that will take you to Google</a>\
<ul><li><p> This question is given to test your knowledge of <b>Web Scraping</b></p>\
<p>Web scraping is a term used to describe the use of a program or algorithm to extract and process large amounts of data from the web.</p></li>\
<li id = "li2">This is an li tag given to you for scraping</li>\
<li>This li tag gives you the various ways to get data from a website\
<ol><li class = "list_or">Using API of the website</li><li>Scrape data using BeautifulSoup</li><li>Scrape data using Selenium</li>\
<li>Scrape data using Scrapy</li></ol></li>\
<li class = "list_or"><a href="https://www.crummy.com/software/BeautifulSoup/bs4/doc/">\
Clicking on this takes you to the documentation of BeautifulSoup</a>\
<a href="https://selenium-python.readthedocs.io/" id="anchor">Clicking on this takes you to the documentation of Selenium</a>\
</li></ul></body></html>'

data = bs(html,"html.parser")
li = data.find_all(id)
for i in li:
    print(i.name)

html
head
title
body
h1
a
ul
li
p
b
p
li
li
ol
li
li
li
li
li
a
a


In [127]:
html = '<!DOCTYPE html><html><head><title>Navigate Parse Tree</title></head>\
<body><h1>This is your Assignment</h1><a href = "https://www.google.com">This is a link that will take you to Google</a>\
<ul><li><p> This question is given to test your knowledge of <b>Web Scraping</b></p>\
<p>Web scraping is a term used to describe the use of a program or algorithm to extract and process large amounts of data from the web.</p></li>\
<li id = "li2">This is an li tag given to you for scraping</li>\
<li>This li tag gives you the various ways to get data from a website\
<ol><li class = "list_or">Using API of the website</li><li>Scrape data using BeautifulSoup</li><li>Scrape data using Selenium</li>\
<li>Scrape data using Scrapy</li></ol></li>\
<li class = "list_or"><a href="https://www.crummy.com/software/BeautifulSoup/bs4/doc/">\
Clicking on this takes you to the documentation of BeautifulSoup</a>\
<a href="https://selenium-python.readthedocs.io/" id="anchor">Clicking on this takes you to the documentation of Selenium</a>\
</li></ul></body></html>'

data = bs(html,"html.parser")
page = data.find("li",{"id":"li2"})

temp = list(page.next_siblings)
for i in temp:
    print(i)

<li>This li tag gives you the various ways to get data from a website<ol><li class="list_or">Using API of the website</li><li>Scrape data using BeautifulSoup</li><li>Scrape data using Selenium</li><li>Scrape data using Scrapy</li></ol></li>
<li class="list_or"><a href="https://www.crummy.com/software/BeautifulSoup/bs4/doc/">Clicking on this takes you to the documentation of BeautifulSoup</a><a href="https://selenium-python.readthedocs.io/" id="anchor">Clicking on this takes you to the documentation of Selenium</a></li>


In [129]:
html = '<!DOCTYPE html><html><head><title>Navigate Parse Tree</title></head>\
<body><h1>This is your Assignment</h1><a href = "https://www.google.com">This is a link that will take you to Google</a>\
<ul><li><p> This question is given to test your knowledge of <b>Web Scraping</b></p>\
<p>Web scraping is a term used to describe the use of a program or algorithm to extract and process large amounts of data from the web.</p></li>\
<li id = "li2">This is an li tag given to you for scraping</li>\
<li>This li tag gives you the various ways to get data from a website\
<ol><li class = "list_or">Using API of the website</li><li>Scrape data using BeautifulSoup</li><li>Scrape data using Selenium</li>\
<li>Scrape data using Scrapy</li></ol></li>\
<li class = "list_or"><a href="https://www.crummy.com/software/BeautifulSoup/bs4/doc/">\
Clicking on this takes you to the documentation of BeautifulSoup</a>\
<a href="https://selenium-python.readthedocs.io/" id="anchor">Clicking on this takes you to the documentation of Selenium</a>\
</li></ul></body></html>'

data = bs(html,"html.parser")
page = data.find("title")
temp = list(page.parents)
for i in temp:
    print(i)

<head><title>Navigate Parse Tree</title></head>
<html><head><title>Navigate Parse Tree</title></head><body><h1>This is your Assignment</h1><a href="https://www.google.com">This is a link that will take you to Google</a><ul><li><p> This question is given to test your knowledge of <b>Web Scraping</b></p><p>Web scraping is a term used to describe the use of a program or algorithm to extract and process large amounts of data from the web.</p></li><li id="li2">This is an li tag given to you for scraping</li><li>This li tag gives you the various ways to get data from a website<ol><li class="list_or">Using API of the website</li><li>Scrape data using BeautifulSoup</li><li>Scrape data using Selenium</li><li>Scrape data using Scrapy</li></ol></li><li class="list_or"><a href="https://www.crummy.com/software/BeautifulSoup/bs4/doc/">Clicking on this takes you to the documentation of BeautifulSoup</a><a href="https://selenium-python.readthedocs.io/" id="anchor">Clicking on this takes you to the doc

In [131]:
html = '<!DOCTYPE html><html><head><title>Navigate Parse Tree</title></head>\
<body><h1>This is your Assignment</h1><a href = "https://www.google.com">This is a link that will take you to Google</a>\
<ul><li><p> This question is given to test your knowledge of <b>Web Scraping</b></p>\
<p>Web scraping is a term used to describe the use of a program or algorithm to extract and process large amounts of data from the web.</p></li>\
<li id = "li2">This is an li tag given to you for scraping</li>\
<li>This li tag gives you the various ways to get data from a website\
<ol><li class = "list_or">Using API of the website</li><li>Scrape data using BeautifulSoup</li><li>Scrape data using Selenium</li>\
<li>Scrape data using Scrapy</li></ol></li>\
<li class = "list_or"><a href="https://www.crummy.com/software/BeautifulSoup/bs4/doc/">\
Clicking on this takes you to the documentation of BeautifulSoup</a>\
<a href="https://selenium-python.readthedocs.io/" id="anchor">Clicking on this takes you to the documentation of Selenium</a>\
</li></ul></body></html>'

data = bs(html,"html.parser")
page = data.find_all('a')[1]
print(page.next_element)

Clicking on this takes you to the documentation of BeautifulSoup
