# HTML Parser Module

In [1]:
from html.parser import HTMLParser

In [2]:
class HTMLParser(HTMLParser):
    '''
    Class to extract and list all tags and print data
    '''
    def handle_starttag(self, tag, attrs):
        print("Start tag: ", tag)
        for attr in attrs:
            print("attr:", attr)
    def handle_endtag(self, tag):
        print("End tag: ", tag)
    def handle_comment(self, data):
        print("Comment: ", data)
    def handle_data(self, data):
        print("Data: ", data)

In [3]:
parser = HTMLParser()
parser.feed("<html><head><title>Coder</title></head><body><h1><!--hi-->I am a coder</h1></body></html>")
print()

Start tag:  html
Start tag:  head
Start tag:  title
Data:  Coder
End tag:  title
End tag:  head
Start tag:  body
Start tag:  h1
Comment:  hi
Data:  I am a coder
End tag:  h1
End tag:  body
End tag:  html



In [4]:
input = input("Put in HTML Code: ")
parser.feed(input)
print()

Put in HTML Code: <html><head><title>Coder</title></head><body><h1><!--hi-->bla bla bla bla bla</h1></body></html>
Start tag:  html
Start tag:  head
Start tag:  title
Data:  Coder
End tag:  title
End tag:  head
Start tag:  body
Start tag:  h1
Comment:  hi
Data:  bla bla bla bla bla
End tag:  h1
End tag:  body
End tag:  html



In [5]:
htmlFile = open("sampleHTML.html", "r")
s = ""
for line in htmlFile:
    s += line
parser.feed(s)

Start tag:  h2
Data:  Welcome
End tag:  h2
Data:  

Start tag:  p
Data:  Welcome to my blog
End tag:  p


# Text Wrap Module

In [6]:
import textwrap

In [7]:
websiteText = """   Learning can happen anywhere with our apps on your computer,
mobile device, and TV, featuring enhanced navigation and faster streaming
for anytime learning. Limitless learning, limitless possibilities."""

In [8]:
print("No Dedent:")
print(textwrap.fill(websiteText))

No Dedent:
   Learning can happen anywhere with our apps on your computer, mobile
device, and TV, featuring enhanced navigation and faster streaming for
anytime learning. Limitless learning, limitless possibilities.


In [9]:
print("Dedent:")
dedent_text = textwrap.dedent(websiteText).strip()
print(dedent_text)

Dedent:
Learning can happen anywhere with our apps on your computer,
mobile device, and TV, featuring enhanced navigation and faster streaming
for anytime learning. Limitless learning, limitless possibilities.


In [10]:
print("Fill:")
print()
print(textwrap.fill(dedent_text, width=50))
print(textwrap.fill(dedent_text, width=100))

Fill:

Learning can happen anywhere with our apps on your
computer, mobile device, and TV, featuring
enhanced navigation and faster streaming for
anytime learning. Limitless learning, limitless
possibilities.
Learning can happen anywhere with our apps on your computer, mobile device, and TV, featuring
enhanced navigation and faster streaming for anytime learning. Limitless learning, limitless
possibilities.


In [11]:
print("Controlling Indent:")
print(textwrap.fill(dedent_text, initial_indent=">>>", subsequent_indent="---"))

Controlling Indent:
>>>Learning can happen anywhere with our apps on your computer, mobile
---device, and TV, featuring enhanced navigation and faster streaming
---for anytime learning. Limitless learning, limitless possibilities.


In [12]:
print("Shortening Text: ")
short = textwrap.shorten("Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."
                         , width=25, placeholder="...")
print(short)

Shortening Text: 
Lorem ipsum dolor sit...


# HTTP Package

In [13]:
import urllib.request
import json
import textwrap

## In this example we'll se the Google Books API
 https://www.googleapis.com/books/v1/volumes?q=isbn:1101904224

In [14]:
with urllib.request.urlopen("https://www.googleapis.com/books/v1/volumes?q=isbn:1101904224") as f:
    text = f.read()
    decodedtext = text.decode('utf-8')
    print(textwrap.fill(decodedtext, width=50))

print()

{  "kind": "books#volumes",  "totalItems": 1,
"items": [   {    "kind": "books#volume",    "id":
"1imJDAAAQBAJ",    "etag": "bmWKVKSIxBY",
"selfLink": "https://www.googleapis.com/books/v1/v
olumes/1imJDAAAQBAJ",    "volumeInfo": {
"title": "Dark Matter",     "subtitle": "A Novel",
"authors": [      "Blake Crouch"     ],
"publisher": "Crown Books",     "publishedDate":
"2016",     "description": "A mind-bending,
relentlessly paced science-fiction thriller, in
which an ordinary man is kidnapped, knocked
unconscious--and awakens in a world inexplicably
different from the reality he thought he knew.",
"industryIdentifiers": [      {       "type":
"ISBN_13",       "identifier": "9781101904220"
},      {       "type": "ISBN_10",
"identifier": "1101904224"      }     ],
"readingModes": {      "text": false,
"image": false     },     "pageCount": 342,
"printType": "BOOK",     "categories": [
"FICTION"     ],     "averageRating": 4.0,
"ratingsCount": 956,     "maturityRating":
"NOT_MATURE",    

In [15]:
obj = json.loads(decodedtext)
print(obj['kind'])

books#volumes


In [16]:
print(obj['items'][0]['searchInfo']['textSnippet'])

A mind-bending, relentlessly paced science-fiction thriller, in which an ordinary man is kidnapped, knocked unconscious--and awakens in a world inexplicably different from the reality he thought he knew.
