Following_Links_in_HTML_Using_BeautifulSoup

runalb · web-flow · commit ddb1fed713aa · 2020-08-17T21:47:24.000+05:30
diff --git a/Using Python to Access Web Data/W4_Ass_2_Following_Links_in_HTML_Using_BeautifulSoup.py b/Using Python to Access Web Data/W4_Ass_2_Following_Links_in_HTML_Using_BeautifulSoup.py
@@ -0,0 +1,27 @@
+import urllib.request, urllib.parse, urllib.error
+from bs4 import BeautifulSoup
+import ssl
+
+ctx = ssl.create_default_context()
+ctx.check_hostname = False
+ctx.verify_mode = ssl.CERT_NONE
+
+url = input('Enter - ')
+count = int(input("Enter count: "))
+pos = int(input("Enter position:"))
+
+print("Retrieving:",url)
+for i in range(0,count):
+    html = urllib.request.urlopen(url).read()
+    soup = BeautifulSoup(html, 'html.parser')
+
+    tags = soup('a')
+    cn = 0
+    ps = 0
+    for tag in tags:
+        ps = ps +1
+        if ps == pos:
+            print("Retrieving:", str(tag.get('href',None)))
+            url = str(tag.get('href',None))
+            ps = 0
+            break