### Fetching internet data

In [1]:
import urllib.request 

In [2]:
def main():
    webUrl = urllib.request.urlopen("http://www.google.com")
    print ("result code: " + str(webUrl.getcode()) + "\n")
    
    data = webUrl.read()
    print (data)

if __name__ == "__main__":
    main()

result code: 200

b'<!doctype html><html itemscope="" itemtype="http://schema.org/WebPage" lang="es"><head><meta content="Google.es permite acceder a la informaci\xf3n mundial en castellano, catal\xe1n, gallego, euskara e ingl\xe9s." name="description"><meta content="noodp" name="robots"><meta content="text/html; charset=UTF-8" http-equiv="Content-Type"><meta content="/images/branding/googleg/1x/googleg_standard_color_128dp.png" itemprop="image"><title>Google</title><script nonce="OCO6AdzquWbCtZZBzmIdSA==">(function(){window.google={kEI:\'CEFCX5z4DYu7UMOXjZgD\',kEXPI:\'0,202123,3,4,32,4,1151580,5663,730,224,755,4349,207,3204,10,1226,364,1499,611,206,383,246,5,1354,648,371,281,2799,51,264,3,1050,90,173,21,865,28,88,93,6,352,597,105,33,127,7,111,34,19,1119550,1197661,65,550,328991,13677,4855,32691,15248,867,17444,11240,9188,8384,4858,1362,9291,3024,4743,11033,1808,4020,978,7931,5297,2054,920,873,1217,2975,6430,14527,4517,2778,921,2275,8,85,2711,1593,1279,2212,530,149,1103,840,517,1466,4,

### Working with JSON data

In [3]:
import json

In [5]:
# define a variable to hold the source URL
# i.e. the free data feed from the USGS, this feed lists all earthquakes for the last day larger than Mag 2.5
urlData = "http://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/2.5_day.geojson"

In [7]:
# Open the URL and read the data
webUrl = urllib.request.urlopen(urlData)
print ("result code: " + str(webUrl.getcode()))
if (webUrl.getcode() == 200):
    data = webUrl.read()
else:
    print ("Received an error from server, cannot retrieve results " + str(webUrl.getcode()))

result code: 200


In [8]:
# Use the json module to load the string data into a dictionary
theJSON = json.loads(data)

In [9]:
# now we can access the contents of the JSON like any other Python object
if "title" in theJSON["metadata"]:
    print (theJSON["metadata"]["title"])

USGS Magnitude 2.5+ Earthquakes, Past Day


In [10]:
# output the number of events, plus the magnitude and each event name  
count = theJSON["metadata"]["count"];
print (str(count) + " events recorded")

41 events recorded


In [11]:
# for each event, print the place where it occurred
for i in theJSON["features"]:
    print (i["properties"]["place"])
print ("--------------\n")

268 km ENE of Tairua, New Zealand
193 km WNW of Panguna, Papua New Guinea
36 km NW of Stanley, Idaho
53 km SE of Iquique, Chile
45 km NW of Mosquito Lake, Alaska
288 km WSW of Tual, Indonesia
48 km E of Farkhār, Afghanistan
96 km S of Charters Towers, Australia
63 km W of Nanwalek, Alaska
126 km SE of Perryville, Alaska
100 km N of Karluk, Alaska
98 km SSW of Nikolski, Alaska
44km W of Ferndale, CA
1 km ENE of Milagro, Ecuador
54 km NNE of Kobuk, Alaska
52 km SSE of Sand Point, Alaska
17 km ESE of Puente Alto, Chile
289 km WNW of Haveluloto, Tonga
37 km WNW of Nanwalek, Alaska
104 km S of Puerto El Triunfo, El Salvador
4 km SE of Maria Antonia, Puerto Rico
87 km E of Petropavlovsk-Kamchatsky, Russia
72km E of Maneadero, B.C., MX
8 km SW of Guánica, Puerto Rico
104 km NNE of Bandar Abbas, Iran
91 km SSW of Sand Point, Alaska
53 km E of Juneau, Alaska
Pagan region, Northern Mariana Islands
56 km E of Juneau, Alaska
21 km SSE of Minto, Alaska
20km ESE of Little Lake, CA
5 km SSE of Poth, 

In [12]:
# print the events that only have a magnitude greater than 4
for i in theJSON["features"]:
    if i["properties"]["mag"] >= 4.0:
        print ("%2.1f" % i["properties"]["mag"], i["properties"]["place"])
print ("--------------\n")

4.6 268 km ENE of Tairua, New Zealand
4.8 193 km WNW of Panguna, Papua New Guinea
4.2 288 km WSW of Tual, Indonesia
4.3 48 km E of Farkhār, Afghanistan
4.7 96 km S of Charters Towers, Australia
4.0 126 km SE of Perryville, Alaska
4.9 98 km SSW of Nikolski, Alaska
4.3 1 km ENE of Milagro, Ecuador
4.2 17 km ESE of Puente Alto, Chile
4.4 289 km WNW of Haveluloto, Tonga
4.7 104 km S of Puerto El Triunfo, El Salvador
4.2 87 km E of Petropavlovsk-Kamchatsky, Russia
4.0 104 km NNE of Bandar Abbas, Iran
5.0 Pagan region, Northern Mariana Islands
4.6 south of the Fiji Islands
5.3 Izu Islands, Japan region
5.4 179 km ENE of Georgetown, Saint Helena
--------------



In [15]:
# print only the events where at least 1 person reported feeling something
print ("\n\nEvents that were felt:")
for i in theJSON["features"]:
    feltReports = i["properties"]["felt"]
    if (feltReports != None):
        if (feltReports > 0):
            print ("%2.1f" % i["properties"]["mag"], i["properties"]["place"], " reported " + str(feltReports) + " times")



Events that were felt:
3.3 36 km NW of Stanley, Idaho  reported 2 times
3.9 53 km SE of Iquique, Chile  reported 12 times
3.6 45 km NW of Mosquito Lake, Alaska  reported 4 times
4.7 96 km S of Charters Towers, Australia  reported 34 times
3.8 63 km W of Nanwalek, Alaska  reported 2 times
3.9 37 km WNW of Nanwalek, Alaska  reported 2 times
2.7 4 km SE of Maria Antonia, Puerto Rico  reported 1 times
2.9 21 km SSE of Minto, Alaska  reported 4 times
2.9 20km ESE of Little Lake, CA  reported 2 times
2.8 22 km SSE of Minto, Alaska  reported 2 times
3.4 3 km SSW of Indios, Puerto Rico  reported 28 times
2.6 11km NNW of Redwood Valley, CA  reported 3 times


### Parsing and processing HTML

In [16]:
from html.parser import HTMLParser

metacount = 0;

In [22]:
class MyHTMLParser(HTMLParser):
  # function to handle an opening tag in the doc
  # this will be called when the closing ">" of the tag is reached
    def handle_starttag(self, tag, attrs):
        global metacount
        if tag == "meta":
            metacount += 1

        print ("Encountered a start tag:", tag)
        pos = self.getpos() # returns a tuple indication line and character
        print ("\tAt line: ", pos[0], " position ", pos[1])

        if attrs.__len__() > 0:
            print ("\tAttributes:")
            for a in attrs:
                print ("\t", a[0],"=",a[1])
      
  # function to handle the ending tag
    def handle_endtag(self, tag):
        print ("Encountered an end tag:", tag)
        pos = self.getpos()
        print ("\tAt line: ", pos[0], " position ", pos[1])
    
  # function to handle character and text data (tag contents)
    def handle_data(self, data):
        if (data.isspace()):
            return
        print ("Encountered some text data:", data)
        pos = self.getpos()
        print ("\tAt line: ", pos[0], " position ", pos[1])
  
  # function to handle the processing of HTML comments
    def handle_comment(self, data):
        print ("Encountered comment:", data)
        pos = self.getpos()
        print ("\tAt line: ", pos[0], " position ", pos[1])

def main():
  # instantiate the parser and feed it some HTML
    parser = MyHTMLParser()
    
  # open the sample HTML file and read it
    f = open("samplehtml.html")
    if f.mode == "r":
        contents = f.read() # read the entire file
        parser.feed(contents)
  
    print ("%d meta tags encountered" % metacount)

if __name__ == "__main__":
    main();

Encountered a start tag: html
	At line:  2  position  0
	Attributes:
	 lang = en
Encountered a start tag: head
	At line:  3  position  2
Encountered a start tag: meta
	At line:  4  position  4
	Attributes:
	 charset = utf-8
Encountered an end tag: meta
	At line:  4  position  4
Encountered a start tag: title
	At line:  5  position  4
Encountered some text data: Sample HTML Document
	At line:  5  position  11
Encountered an end tag: title
	At line:  5  position  31
Encountered a start tag: meta
	At line:  6  position  4
	Attributes:
	 name = description
	 content = This is a sample HTML file
Encountered an end tag: meta
	At line:  6  position  4
Encountered a start tag: meta
	At line:  7  position  4
	Attributes:
	 name = author
	 content = Administrator
Encountered an end tag: meta
	At line:  7  position  4
Encountered a start tag: meta
	At line:  8  position  4
	Attributes:
	 name = viewport
	 content = width=device-width; initial-scale=1.0
Encountered an end tag: meta
	At line:  8  p

### Manipulating XML

In [23]:
import xml.dom.minidom

In [24]:
# use the parse() function to load and parse an XML file
doc = xml.dom.minidom.parse("samplexml.xml")

In [25]:
# print out the document node and the name of the first child tag
print (doc.nodeName)
print (doc.firstChild.tagName)

#document
person


In [26]:
# get a list of XML tags from the document and print each one
skills = doc.getElementsByTagName("skill")
print ("%d skills:" % skills.length)
for skill in skills:
    print (skill.getAttribute("name"))

4 skills:
JavaScript
Python
C#
HTML


In [27]:
# create a new XML tag and add it into the document
newSkill = doc.createElement("skill")
newSkill.setAttribute("name", "jQuery")
doc.firstChild.appendChild(newSkill)

<DOM Element: skill at 0x1d4f7384868>

In [28]:
skills = doc.getElementsByTagName("skill")
print ("%d skills:" % skills.length)
for skill in skills:
    print (skill.getAttribute("name"))

5 skills:
JavaScript
Python
C#
HTML
jQuery
