# Python for Everybody
## Chapter 13 | Using Web Services - Exercises

https://www.py4e.com/html3/13-web

***

### Exercise 1: 

#### Change either <font color="red">***geojson.py***</font> or <font color="red">***geoxml.py***</font> to print out the two-character country code from the retrieved data. Add error checking so your program does not traceback if the country code is not there. Once you have it working, search for “Atlantic Ocean” and make sure it can handle locations that are not in any country.

#### Source Codes:

In [None]:
# geojson.py - Source Code

import urllib.request, urllib.parse, urllib.error
import json
import ssl

api_key = False
# If you have a Google Places API key, enter it here
# api_key = 'AIzaSy___IDByT70'
# https://developers.google.com/maps/documentation/geocoding/intro

if api_key is False:
    api_key = 42
    serviceurl = 'http://py4e-data.dr-chuck.net/json?'
else :
    serviceurl = 'https://maps.googleapis.com/maps/api/geocode/json?'

# Ignore SSL certificate errors
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE

while True:
    address = input('Enter location: ')
    if len(address) < 1: break

    parms = dict()
    parms['address'] = address
    if api_key is not False: parms['key'] = api_key
    url = serviceurl + urllib.parse.urlencode(parms)

    print('Retrieving', url)
    uh = urllib.request.urlopen(url, context=ctx)
    data = uh.read().decode()
    print('Retrieved', len(data), 'characters')

    try:
        js = json.loads(data)
    except:
        js = None

    if not js or 'status' not in js or js['status'] != 'OK':
        print('==== Failure To Retrieve ====')
        print(data)
        continue

    print(json.dumps(js, indent=4))

    lat = js['results'][0]['geometry']['location']['lat']
    lng = js['results'][0]['geometry']['location']['lng']
    print('lat', lat, 'lng', lng)
    location = js['results'][0]['formatted_address']
    print(location)


In [None]:
# geoxml.py - Source Code

import urllib.request, urllib.parse, urllib.error
import xml.etree.ElementTree as ET
import ssl

api_key = False
# If you have a Google Places API key, enter it here
# api_key = 'AIzaSy___IDByT70'
# https://developers.google.com/maps/documentation/geocoding/intro

if api_key is False:
    api_key = 42
    serviceurl = 'http://py4e-data.dr-chuck.net/xml?'
else :
    serviceurl = 'https://maps.googleapis.com/maps/api/geocode/xml?'

# Ignore SSL certificate errors
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE

while True:
    address = input('Enter location: ')
    if len(address) < 1: break

    parms = dict()
    parms['address'] = address
    if api_key is not False: parms['key'] = api_key
    url = serviceurl + urllib.parse.urlencode(parms)
    print('Retrieving', url)
    uh = urllib.request.urlopen(url, context=ctx)

    data = uh.read()
    print('Retrieved', len(data), 'characters')
    print(data.decode())
    tree = ET.fromstring(data)

    results = tree.findall('result')
    lat = results[0].find('geometry').find('location').find('lat').text
    lng = results[0].find('geometry').find('location').find('lng').text
    location = results[0].find('formatted_address').text

    print('lat', lat, 'lng', lng)
    print(location)


#### Edited Code:

In [4]:
# geojson.py - Edited Code

import urllib.request, urllib.parse, urllib.error
import json
import ssl

api_key = 42
serviceurl = 'http://py4e-data.dr-chuck.net/json?'

# Ignore SSL certificate errors
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE

while True:
    address = input('Enter location: ')
    if len(address) < 1 or address.lower() == 'done':
        print('Program ended. Thank you.')
        break

    parms = dict()
    parms['address'] = address
    if api_key is not False: parms['key'] = api_key
    url = serviceurl + urllib.parse.urlencode(parms)

    print('Retrieving', url)
    uh = urllib.request.urlopen(url, context=ctx)
    data = uh.read().decode()
    print('Retrieved', len(data), 'characters')

    try:
        js = json.loads(data)
    except:
        js = None

    if not js or 'status' not in js or js['status'] != 'OK':
        print('==== Failure To Retrieve ====')
        print(data)
        continue
    
    # Edits starts Here:
    
    address_comps = js['results'][0]
    comps = address_comps['address_components']
    country = False
    
    try:
        for i in comps:
            types = i['types']
            if types == ["country", "political"]:
                country = True
                print('----------')
                print("Country Code:", i["short_name"])
                print('Location:', address_comps['formatted_address'])
                print('\n')
        if country == False:
            raise Exception
    
    except:
        print('----------')
        print('This location does not fall within continental borders.')
        print('Location:', address_comps['formatted_address'])
        print('\n')

Enter location: South Africa
Retrieving http://py4e-data.dr-chuck.net/json?address=South+Africa&key=42
Retrieved 1185 characters
----------
Country Code: ZA
Location: South Africa


Enter location: BErlin
Retrieving http://py4e-data.dr-chuck.net/json?address=BErlin&key=42
Retrieved 1524 characters
----------
Country Code: DE
Location: Berlin, Germany


Enter location: Cape Town
Retrieving http://py4e-data.dr-chuck.net/json?address=Cape+Town&key=42
Retrieved 1726 characters
----------
Country Code: ZA
Location: Cape Town, South Africa


Enter location: Ann Arbor
Retrieving http://py4e-data.dr-chuck.net/json?address=Ann+Arbor&key=42
Retrieved 1736 characters
----------
Country Code: US
Location: Ann Arbor, MI, USA


Enter location: done
Program ended. Thank you.


In [17]:
# geoxml.py - Edited Code

import urllib.request, urllib.parse, urllib.error
import xml.etree.ElementTree as ET
import ssl

api_key = 42
serviceurl = 'http://py4e-data.dr-chuck.net/xml?'

# Ignore SSL certificate errors
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE

while True:
    address = input('Enter location: ')
    if len(address) < 1 or address.lower() == 'done':
        print('Program ended. Thank you.')
        break

    parms = dict()
    parms['address'] = address
    if api_key is not False: parms['key'] = api_key
    url = serviceurl + urllib.parse.urlencode(parms)
    print('Retrieving', url)
    uh = urllib.request.urlopen(url, context=ctx)

    data = uh.read()
    print('Retrieved', len(data), 'characters')
    tree = ET.fromstring(data)
    
    # Edits starts Here:
    
    address_comps = tree.findall('result/address_component')
    comps = address_comps[0:]
    country = False
    
    try:
        for parent in comps:
            types = parent.findall('.//type')
            for child in types:
                if child.text == 'country':
                    country = True
                    print('----------')
                    print("Country Code:", parent.find('short_name').text)
                    print('Location:', tree.findall('result/formatted_address')[0].text)
                    print('\n')
        if country == False:
            raise Exception
    except:
        print('----------')
        print('This location does not fall within continental borders.')
        print('Location:', tree.findall('result/formatted_address')[0].text)
        print('\n')

Enter location: Michigan
Retrieving http://py4e-data.dr-chuck.net/xml?address=Michigan&key=42
Retrieved 1200 characters
----------
Country Code: US
Location: Michigan, USA


Enter location: Cape Town
Retrieving http://py4e-data.dr-chuck.net/xml?address=Cape+Town&key=42
Retrieved 1553 characters
----------
Country Code: ZA
Location: Cape Town, South Africa


Enter location: South Africa
Retrieving http://py4e-data.dr-chuck.net/xml?address=South+Africa&key=42
Retrieved 997 characters
----------
Country Code: ZA
Location: South Africa


Enter location: Atlantic Ocean
Retrieving http://py4e-data.dr-chuck.net/xml?address=Atlantic+Ocean&key=42
Retrieved 1038 characters
----------
This location does not fall within continental borders.
Location: Atlantic Ocean


Enter location: Indian Ocean
Retrieving http://py4e-data.dr-chuck.net/xml?address=Indian+Ocean&key=42
Retrieved 1031 characters
----------
This location does not fall within continental borders.
Location: Indian Ocean


Enter location

***

## Assignment 1
### Extracting Data from XML

In this assignment you will write a Python program somewhat similar to http://www.py4e.com/code3/geoxml.py. The program will prompt for a URL, read the XML data from that URL using **urllib** and then parse and extract the comment counts from the XML data, compute the sum of the numbers in the file.

We provide two files for this assignment. One is a sample file where we give you the sum for your testing and the other is the actual data you need to process for the assignment.

- Sample data: http://py4e-data.dr-chuck.net/comments_42.xml (Sum=2553)<br>
- Actual data: http://py4e-data.dr-chuck.net/comments_941923.xml (Sum ends with 14)

You do not need to save these files to your folder since your program will read the data directly from the URL. **Note:** Each student will have a distinct data url for the assignment - so only use your own data url for analysis.

**Data Format and Approach**

The data consists of a number of names and comment counts in XML as follows:

In [None]:
<comment>
  <name>Matthias</name>
  <count>97</count>
</comment>

You are to look through all the < comment > tags and find the < count > values sum the numbers. The closest sample code that shows how to parse XML is <a href="https://www.py4e.com/tools/python-data/?PHPSESSID=a74f64eba639bda88992b818c1ea20a3">geoxml.py</a>. But since the nesting of the elements in our data is different than the data we are parsing in that sample code you will have to make real changes to the code.

To make the code a little simpler, you can use an XPath selector string to look through the entire tree of XML for any tag named 'count' with the following line of code:

In [None]:
counts = tree.findall('.//count')

Take a look at the Python ElementTree documentation and look for the supported XPath syntax for details. You could also work from the top of the XML down to the comments node and then loop through the child nodes of the comments node.

**Sample Execution**

In [None]:
$ python3 solution.py
Enter location: http://py4e-data.dr-chuck.net/comments_42.xml
Retrieving http://py4e-data.dr-chuck.net/comments_42.xml
Retrieved 4189 characters
Count: 50
Sum: 2...

**Turning in the Assignment**

Enter the sum from the actual data and your Python code below:

Sum: (ends with 14)

In [7]:
# Sean Ravenhill - program for the assignment.

import urllib.request, urllib.parse, urllib.error
import xml.etree.ElementTree as ET
import ssl

# Ignore SSL certificate errors
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE

counts = 0

while True:
    url = input('Enter XML URL: ')
    print('\n')
    if url.lower() == 'done':
        print("Program Ended. Thank You.")
        break
    elif len(url) < 1 or '.xml' not in url:
        print('Please enter a valid url with an .xml suffix')
        print('\n')
        continue
    
    else:    
        print('Retrieving', url)
        uh = urllib.request.urlopen(url, context=ctx)

        data = uh.read()
        print('Retrieved', len(data), 'characters')
        tree = ET.fromstring(data)
        
        lst = tree.findall('.//count')
        
        for child in lst:
            counts += int(child.text)
            
        print('\n')
        print(counts)
        
        break

Enter XML URL: http://py4e-data.dr-chuck.net/comments_941923.xml


Retrieving http://py4e-data.dr-chuck.net/comments_941923.xml
Retrieved 4238 characters


2614


***

## Assignment 2
### Extracting Data from JSON

In this assignment you will write a Python program somewhat similar to http://www.py4e.com/code3/json2.py. The program will prompt for a URL, read the JSON data from that URL using **urllib** and then parse and extract the comment counts from the JSON data, compute the sum of the numbers in the file and enter the sum below:

We provide two files for this assignment. One is a sample file where we give you the sum for your testing and the other is the actual data you need to process for the assignment.

- Sample data: http://py4e-data.dr-chuck.net/comments_42.json (Sum=2553)<br>
- Actual data: http://py4e-data.dr-chuck.net/comments_941924.json (Sum ends with 65)

You do not need to save these files to your folder since your program will read the data directly from the URL. **Note:** Each student will have a distinct data url for the assignment - so only use your own data url for analysis.

**Data Format**

The data consists of a number of names and comment counts in JSON as follows:

In [None]:
{
  comments: [
    {
      name: "Matthias"
      count: 97
    },
    {
      name: "Geomer"
      count: 97
    }
    ...
  ]
}

The closest sample code that shows how to parse JSON and extract a list is <a href="https://www.py4e.com/code3/json2.py?PHPSESSID=bc214bf945d623794ca0d3ab4ab1a2cf">json2.py</a>. You might also want to look at <a href="https://www.py4e.com/code3/geoxml.py?PHPSESSID=bc214bf945d623794ca0d3ab4ab1a2cf">geoxml.py</a> to see how to prompt for a URL and retrieve data from a URL.

**Sample Execution**

In [None]:
$ python3 solution.py
Enter location: http://py4e-data.dr-chuck.net/comments_42.json
Retrieving http://py4e-data.dr-chuck.net/comments_42.json
Retrieved 2733 characters
Count: 50
Sum: 2...

**Turning in the Assignment**

Enter the sum from the actual data and your Python code below:

Sum: (ends with 65)

In [21]:
# Sean Ravenhill - program for the assignment.

import urllib.request, urllib.parse, urllib.error
import json
import ssl

# Ignore SSL certificate errors
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE

counts = 0

while True:
    url = input('Enter JSON URL: ')
    print('\n')
    if url.lower() == 'done':
        print("Program Ended. Thank You.")
        break
    elif len(url) < 1 or '.json' not in url:
        print('Please enter a valid url with an .json suffix')
        print('\n')
        continue
    
    else:    
        print('Retrieving', url)
        uh = urllib.request.urlopen(url, context=ctx)
        data = uh.read()
        print('Retrieved', len(data), 'characters')
        info = json.loads(data)

        
        for a, b in info.items():
            if a == 'comments':
                data = b
                for item in data:
                    counts += int(item['count'])
        
        print(counts)
                    
        break

Enter JSON URL: http://py4e-data.dr-chuck.net/comments_941924.json


Retrieving http://py4e-data.dr-chuck.net/comments_941924.json
Retrieved 2728 characters
2665


**Answer:**

Sum: 2665

***

## Assignment 3
### Calling a JSON API

In this assignment you will write a Python program somewhat similar to http://www.py4e.com/code3/geojson.py. The program will prompt for a location, contact a web service and retrieve JSON for the web service and parse that data, and retrieve the first **place_id** from the JSON. A place ID is a textual identifier that uniquely identifies a place as within Google Maps.

**API End Points**

To complete this assignment, you should use this API endpoint that has a static subset of the Google Data:

In [None]:
http://py4e-data.dr-chuck.net/json?

This API uses the same parameter (address) as the Google API. This API also has no rate limit so you can test as often as you like. If you visit the URL with no parameters, you get "No address..." response.

To call the API, you need to include a **key=** parameter and provide the address that you are requesting as the **address=** parameter that is properly URL encoded using the **urllib.parse.urlencode()** function as shown in http://www.py4e.com/code3/geojson.py

Make sure to check that your code is using the API endpoint is as shown above. You will get *different* results from the **geojson** and **json** endpoints so make sure you are using the same end point as this autograder is using.

**Test Data / Sample Execution**

You can test to see if your program is working with a location of "South Federal University" which will have a **place_id** of "ChIJ1Z9sheJZkFQRDePQqQebCdg".

In [None]:
$ python3 solution.py
Enter location: South Federal University
Retrieving http://...
Retrieved 2275 characters
Place id ChIJ1Z9sheJZkFQRDePQqQebCdg

**Turn In**

Please run your program to find the **place_id** for this location:

In [None]:
Washington State University

Make sure to enter the name and case exactly as above and enter the **place_id** and your Python code below. Hint: The first seven characters of the **place_id** are "ChIJb6h ..."

Make sure to retreive the data from the URL specified above and **not** the normal Google API. Your program should work with the Google API - but the **place_id** may not match for this assignment.

place_id:

In [32]:
# Sean Ravenhill - program for the assignment.

import urllib.request, urllib.parse, urllib.error
import json
import ssl

api_key = 42
serviceurl = 'http://py4e-data.dr-chuck.net/json?'

# Ignore SSL certificate errors
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE

while True:
    address = input('Enter location: ')
    if address.lower() == 'done':
        print("Program Ended. Thank You.")
        break
    if len(address) < 1:
        print('Please enter a valid location name.')
        continue

    parms = dict()
    parms['address'] = address
    if api_key is not False: parms['key'] = api_key
    url = serviceurl + urllib.parse.urlencode(parms)

    print('Retrieving', url)
    uh = urllib.request.urlopen(url, context=ctx)
    data = uh.read().decode()
    print('Retrieved', len(data), 'characters')

    try:
        js = json.loads(data)
    except:
        js = None

    if not js or 'status' not in js or js['status'] != 'OK':
        print('==== Failure To Retrieve ====')
        print(data)
        continue

    place_id = js['results'][0]['place_id']
    print("Place ID:", place_id)
    
    break

Enter location: Washington State University
Retrieving http://py4e-data.dr-chuck.net/json?address=Washington+State+University&key=42
Retrieved 1749 characters
Place ID: ChIJb6hIyAWHn1QR23BXtaTyZfQ


**Answer:**

place_id: ChIJb6hIyAWHn1QR23BXtaTyZfQ