Skip to content

Commit ddb1fed

Browse files
authored
Following_Links_in_HTML_Using_BeautifulSoup
1 parent 45ea5b0 commit ddb1fed

File tree

1 file changed

+27
-0
lines changed

1 file changed

+27
-0
lines changed
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
import urllib.request, urllib.parse, urllib.error
2+
from bs4 import BeautifulSoup
3+
import ssl
4+
5+
ctx = ssl.create_default_context()
6+
ctx.check_hostname = False
7+
ctx.verify_mode = ssl.CERT_NONE
8+
9+
url = input('Enter - ')
10+
count = int(input("Enter count: "))
11+
pos = int(input("Enter position:"))
12+
13+
print("Retrieving:",url)
14+
for i in range(0,count):
15+
html = urllib.request.urlopen(url).read()
16+
soup = BeautifulSoup(html, 'html.parser')
17+
18+
tags = soup('a')
19+
cn = 0
20+
ps = 0
21+
for tag in tags:
22+
ps = ps +1
23+
if ps == pos:
24+
print("Retrieving:", str(tag.get('href',None)))
25+
url = str(tag.get('href',None))
26+
ps = 0
27+
break

0 commit comments

Comments
 (0)