In [4]:
# Example file for retrieving data from the internet
# LinkedIn Learning Python course by Joe Marini

import urllib.request

def main():
    weburl = urllib.request.urlopen("http://www.google.com")
    print("result code: ",weburl.getcode())
    data = weburl.read()
    print(data)

if __name__ == "__main__":
    main()

# 200 tells it working file
# It it would be 404 means not found

result code:  200
b'<!doctype html><html itemscope="" itemtype="http://schema.org/WebPage" lang="en-IN"><head><meta content="text/html; charset=UTF-8" http-equiv="Content-Type"><meta content="/images/branding/googleg/1x/googleg_standard_color_128dp.png" itemprop="image"><title>Google</title><script nonce="azf3Y_Y9AZXw8JeMR6RNHQ">(function(){var _g={kEI:\'IJ2PZtuPFaze1sQPsqKUsAk\',kEXPI:\'0,3700281,1100,8,535945,2711,2872,2891,8348,34680,30022,6397,27869,145698,2,16737,23024,6699,41949,57734,2,2,1,24626,2006,8155,23351,22435,9779,45600,17057,33566,10942,28671,3030,15816,1804,7734,18674,21016,1290,40385,15977,5203209,8576,891,530,91,355,5,19,5991430,2839759,16,809,6,68,23937767,2738067,123563,1198752,43887,3,318,4,1281,3,2124363,23034776,2738,10336,2708,8028,2875,9149,24640,36870,10004,507,2370,4832,1575,13845,12953,2212,8181,5929,8457,6329,21800,5080,1835,10085,7766,3815,6758,155,2,2482,13503,7736,6598,2,2540,3150,1449,206,122,1542,1,1674,4,3004,7560,4,409,1974,2,4,1608,2336,1694,4083,4

## Connecting Real Time JSON Data Feed

In [7]:
# Example file for parsing and processing JSON
# LinkedIn Learning Python course by Joe Marini

import urllib.request  # instead of urllib2 like in Python 2.7
import json


def printResults(data):
    # Use the json module to load the string data into a dictionary
    theJSON = json.loads(data)

    # now we can access the contents of the JSON like any other Python object
    if "title" in theJSON["metadata"]:
        print(theJSON["metadata"]["title"])

    # output the number of events, plus the magnitude and each event name
    count = theJSON["metadata"]["count"]
    print(str(count) + " events recorded")

    # for each event, print the place where it occurred
    for i in theJSON["features"]:
        print(i["properties"]["place"])
    print("--------------\n")

    # print the events that only have a magnitude greater than 4
    for i in theJSON["features"]:
        if i["properties"]["mag"] >= 4.0:
            print("%2.1f" % i["properties"]["mag"], i["properties"]["place"])
    print("--------------\n")

    # print only the events where at least 1 person reported feeling something
    print("\n\nEvents that were felt:")
    for i in theJSON["features"]:
        feltReports = i["properties"]["felt"]
        if (feltReports != None):
            if (feltReports > 0):
                print("%2.1f" % i["properties"]["mag"], i["properties"]
                      ["place"], " reported " + str(feltReports) + " times")


def main():
    # define a variable to hold the source URL
    # In this case we'll use the free data feed from the USGS
    # This feed lists all earthquakes for the last day larger than Mag 2.5
    urlData = "https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/2.5_day.geojson"

    # Open the URL and read the data
    webUrl = urllib.request.urlopen(urlData)
    print("result code: " + str(webUrl.getcode()))
    if (webUrl.getcode() == 200):
        data = webUrl.read().decode("utf-8")
        # print out our customized results
        printResults(data)
    else:
        print("Received an error from server, cannot retrieve results " +
              str(webUrl.getcode()))


if __name__ == "__main__":
    main()


URLError: <urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: self-signed certificate in certificate chain (_ssl.c:1000)>

## Parsing and Processing HTML

In [12]:
# Example file for parsing and processing HTML
# LinkedIn Learning Python course by Joe Marini


# import the HTMLParser module
# in Python 3 you need to import from html.parser
from html.parser import HTMLParser

paragraphs = 0

# create a subclass of HTMLParser and override the handler methods
class MyHTMLParser(HTMLParser):
    # function to handle an opening tag in the doc
    # this will be called when the closing ">" of the tag is reached
    def handle_starttag(self, tag, attrs):
        global paragraphs
        if tag == "p":
            paragraphs += 1

        print ("Encountered a start tag:", tag)
        pos = self.getpos() # returns a tuple indication line and character
        print ("\tAt line: ", pos[0], " position ", pos[1])

        if attrs.__len__() > 0:
            print ("\tAttributes:")
            for a in attrs:
                print ("\t", a[0],"=",a[1])
              
    # function to handle character and text data (tag contents)
    def handle_data(self, data):
        if (data.isspace()):
            return
        print ("Encountered some text data:", data)
        pos = self.getpos()
        print ("\tAt line: ", pos[0], " position ", pos[1])
    
    # function to handle the processing of HTML comments
    def handle_comment(self, data):
        print ("Encountered comment:", data)
        pos = self.getpos()
        print ("\tAt line: ", pos[0], " position ", pos[1])

def main():
    # instantiate the parser and feed it some HTML
    parser = MyHTMLParser()
      
    # open the sample HTML file and read it
    f = open("samplehtml.html")
    if f.mode == "r":
        contents = f.read() # read the entire file
        parser.feed(contents)
    
    print ("Paragraph tags:", paragraphs)

if __name__ == "__main__":
    main()
  

Encountered a start tag: html
	At line:  2  position  0
	Attributes:
	 lang = en
Encountered a start tag: head
	At line:  3  position  2
Encountered a start tag: title
	At line:  4  position  4
Encountered some text data: Sample HTML Document
	At line:  4  position  11
Encountered a start tag: body
	At line:  6  position  2
Encountered comment:  This is a comment 
	At line:  7  position  4
Encountered a start tag: h1
	At line:  8  position  4
Encountered some text data: HTML Sample File
	At line:  8  position  8
Encountered a start tag: p
	At line:  9  position  4
Encountered some text data: This is some text
	At line:  9  position  7
Encountered a start tag: p
	At line:  10  position  4
Encountered a start tag: a
	At line:  10  position  7
	Attributes:
	 href = /contact
Encountered some text data: Contact
	At line:  10  position  26
Paragraph tags: 2


In [11]:
# Example file for parsing and processing XML
# LinkedIn Learning Python course by Joe Marini


import xml.dom.minidom

def main():
    # use the parse() function to load and parse an XML file
    doc = xml.dom.minidom.parse("samplexml.xml")
    
    # print out the document node and the name of the first child tag
    print (doc.nodeName)
    print (doc.firstChild.tagName)
    
    # get a list of XML tags from the document and print each one
    skills = doc.getElementsByTagName("skill")
    print ("%d skills:" % skills.length)
    for skill in skills:
        print (skill.getAttribute("name"))
      
    # create a new XML tag and add it into the document
    newSkill = doc.createElement("skill")
    newSkill.setAttribute("name", "jQuery")
    doc.firstChild.appendChild(newSkill)

    skills = doc.getElementsByTagName("skill")
    print ("%d skills:" % skills.length)
    for skill in skills:
        print (skill.getAttribute("name"))
        
if __name__ == "__main__":
    main()



#document
person
4 skills:
Python
Java
HTML
Javascript
5 skills:
Python
Java
HTML
Javascript
jQuery
