# Simple parsing with HTMLParser

**Import libraries**

In [1]:
# htmlparser to parse html
from html.parser import HTMLParser
# to open and read urls
import urllib.request

**Open and read url**

In [2]:
url = urllib.request.urlopen("https://en.wikipedia.org/wiki/Python_(programming_language)")
content = url.read().decode() # html code that we'll be parsing
url.close()

**Creating our Parser class**

In [23]:
# inheriting from parent class
class Parser(HTMLParser):
    def __init__(self):
        super().__init__()
        self.reset()
        # if we want to store results as a list
        self.links = []

    def handle_starttag(self, tag, attrs):
        if tag == 'a': # storing links
            for name, link in attrs:
                # attrs returns a list containing (name, link)
                # e.g. ('href', 'http://www.cwi.nl/')
                if name == "href" and link.startswith("http"):
                    print(link)
                    # storing results in a list
                    self.links.append(link)

**Instantiate**

In [24]:
p = Parser()
p.feed(content)

https://donate.wikimedia.org/wiki/Special:FundraiserRedirector?utm_source=donate&utm_medium=sidebar&utm_campaign=C13_en.wikipedia.org&uselang=en
https://af.wikipedia.org/wiki/Python_(programmeertaal)
https://als.wikipedia.org/wiki/Python_(Programmiersprache)
https://ar.wikipedia.org/wiki/%D8%A8%D8%A7%D9%8A%D8%AB%D9%88%D9%86_(%D9%84%D8%BA%D8%A9_%D8%A8%D8%B1%D9%85%D8%AC%D8%A9)
https://an.wikipedia.org/wiki/Python
https://as.wikipedia.org/wiki/%E0%A6%AA%E0%A6%BE%E0%A6%87%E0%A6%A5%E0%A6%A8
https://ast.wikipedia.org/wiki/Python
https://az.wikipedia.org/wiki/Python_(proqramla%C5%9Fd%C4%B1rma_dili)
https://azb.wikipedia.org/wiki/%D9%BE%D8%A7%DB%8C%D8%AA%D9%88%D9%86
https://ban.wikipedia.org/wiki/Python
https://bn.wikipedia.org/wiki/%E0%A6%AA%E0%A6%BE%E0%A6%87%E0%A6%A5%E0%A6%A8_(%E0%A6%AA%E0%A7%8D%E0%A6%B0%E0%A7%8B%E0%A6%97%E0%A7%8D%E0%A6%B0%E0%A6%BE%E0%A6%AE%E0%A6%BF%E0%A6%82_%E0%A6%AD%E0%A6%BE%E0%A6%B7%E0%A6%BE)
https://zh-min-nan.wikipedia.org/wiki/Python
https://be.wikipedia.org/wiki/Pytho

**Output stored in the list**

In [25]:
links = p.links
print(len(links))

for link in links:
    print(link)

614
https://donate.wikimedia.org/wiki/Special:FundraiserRedirector?utm_source=donate&utm_medium=sidebar&utm_campaign=C13_en.wikipedia.org&uselang=en
https://af.wikipedia.org/wiki/Python_(programmeertaal)
https://als.wikipedia.org/wiki/Python_(Programmiersprache)
https://ar.wikipedia.org/wiki/%D8%A8%D8%A7%D9%8A%D8%AB%D9%88%D9%86_(%D9%84%D8%BA%D8%A9_%D8%A8%D8%B1%D9%85%D8%AC%D8%A9)
https://an.wikipedia.org/wiki/Python
https://as.wikipedia.org/wiki/%E0%A6%AA%E0%A6%BE%E0%A6%87%E0%A6%A5%E0%A6%A8
https://ast.wikipedia.org/wiki/Python
https://az.wikipedia.org/wiki/Python_(proqramla%C5%9Fd%C4%B1rma_dili)
https://azb.wikipedia.org/wiki/%D9%BE%D8%A7%DB%8C%D8%AA%D9%88%D9%86
https://ban.wikipedia.org/wiki/Python
https://bn.wikipedia.org/wiki/%E0%A6%AA%E0%A6%BE%E0%A6%87%E0%A6%A5%E0%A6%A8_(%E0%A6%AA%E0%A7%8D%E0%A6%B0%E0%A7%8B%E0%A6%97%E0%A7%8D%E0%A6%B0%E0%A6%BE%E0%A6%AE%E0%A6%BF%E0%A6%82_%E0%A6%AD%E0%A6%BE%E0%A6%B7%E0%A6%BE)
https://zh-min-nan.wikipedia.org/wiki/Python
https://be.wikipedia.org/wiki/P