# Pulling data from public APIs (without registration) - GET request

In [1]:
# loading the packages
# requests provides us with the capabilities of sending an HTTP request to a server
import requests

## Extracting data on currency exchange rates

In [2]:
# We will use an API containing currency exchange rates as published by the European Central Bank
# Documentation at https://exchangeratesapi.io

### Sending a GET request

In [3]:
# Define the base URL
# Base URL: the part of the URL common to all requests, not containing the parameters
base_url = "https://api.exchangeratesapi.io/latest"

In [4]:
# We can make a GET request to this API endpoint with requests.get
response = requests.get(base_url)

# This method returns the response from the server
# We store this response in a variable for future processing

### Investigating the response

In [5]:
# Checking if the request went through ok
response.ok

True

In [6]:
# Checking the status code of the response
response.status_code

200

In [7]:
# Inspecting the content body of the response (as a regular 'string')
response.text

'{"rates":{"CAD":1.5644,"HKD":9.1236,"ISK":161.1,"PHP":57.631,"DKK":7.4472,"HUF":345.15,"CZK":26.117,"AUD":1.6514,"RON":4.8356,"SEK":10.2513,"IDR":17374.0,"INR":88.051,"BRL":6.3782,"RUB":86.3013,"HRK":7.4885,"JPY":125.92,"THB":36.62,"CHF":1.0755,"SGD":1.6166,"PLN":4.4033,"BGN":1.9558,"TRY":8.6074,"CNY":8.1758,"NOK":10.5463,"NZD":1.7997,"ZAR":20.5097,"USD":1.1771,"MXN":26.3381,"ILS":4.0101,"GBP":0.90475,"KRW":1393.92,"MYR":4.9362},"base":"EUR","date":"2020-08-12"}'

In [8]:
# Inspecting the content of the response (in 'bytes' format)
response.content

b'{"rates":{"CAD":1.5644,"HKD":9.1236,"ISK":161.1,"PHP":57.631,"DKK":7.4472,"HUF":345.15,"CZK":26.117,"AUD":1.6514,"RON":4.8356,"SEK":10.2513,"IDR":17374.0,"INR":88.051,"BRL":6.3782,"RUB":86.3013,"HRK":7.4885,"JPY":125.92,"THB":36.62,"CHF":1.0755,"SGD":1.6166,"PLN":4.4033,"BGN":1.9558,"TRY":8.6074,"CNY":8.1758,"NOK":10.5463,"NZD":1.7997,"ZAR":20.5097,"USD":1.1771,"MXN":26.3381,"ILS":4.0101,"GBP":0.90475,"KRW":1393.92,"MYR":4.9362},"base":"EUR","date":"2020-08-12"}'

In [9]:
# The data is presented in JSON format

### Handling the JSON

In [10]:
# Requests has in-build method to directly convert the response to JSON format
response.json()

{'rates': {'CAD': 1.5644,
  'HKD': 9.1236,
  'ISK': 161.1,
  'PHP': 57.631,
  'DKK': 7.4472,
  'HUF': 345.15,
  'CZK': 26.117,
  'AUD': 1.6514,
  'RON': 4.8356,
  'SEK': 10.2513,
  'IDR': 17374.0,
  'INR': 88.051,
  'BRL': 6.3782,
  'RUB': 86.3013,
  'HRK': 7.4885,
  'JPY': 125.92,
  'THB': 36.62,
  'CHF': 1.0755,
  'SGD': 1.6166,
  'PLN': 4.4033,
  'BGN': 1.9558,
  'TRY': 8.6074,
  'CNY': 8.1758,
  'NOK': 10.5463,
  'NZD': 1.7997,
  'ZAR': 20.5097,
  'USD': 1.1771,
  'MXN': 26.3381,
  'ILS': 4.0101,
  'GBP': 0.90475,
  'KRW': 1393.92,
  'MYR': 4.9362},
 'base': 'EUR',
 'date': '2020-08-12'}

In [11]:
# In Python, this JSON is stored as a dictionary
type(response.json())

dict

In [12]:
# A useful library for JSON manipulation and pretty print
import json

# It has two main methods:
# .loads(), which creates a Python dictionary from a JSON format string (just as response.json() does)
# .dumps(), which creates a JSON format string out of a Python dictionary 

In [13]:
# .dumps() has options to make the string 'prettier', more readable
# We can choose the number of spaces to be used as indentation
json.dumps(response.json(), indent=4)

'{\n    "rates": {\n        "CAD": 1.5644,\n        "HKD": 9.1236,\n        "ISK": 161.1,\n        "PHP": 57.631,\n        "DKK": 7.4472,\n        "HUF": 345.15,\n        "CZK": 26.117,\n        "AUD": 1.6514,\n        "RON": 4.8356,\n        "SEK": 10.2513,\n        "IDR": 17374.0,\n        "INR": 88.051,\n        "BRL": 6.3782,\n        "RUB": 86.3013,\n        "HRK": 7.4885,\n        "JPY": 125.92,\n        "THB": 36.62,\n        "CHF": 1.0755,\n        "SGD": 1.6166,\n        "PLN": 4.4033,\n        "BGN": 1.9558,\n        "TRY": 8.6074,\n        "CNY": 8.1758,\n        "NOK": 10.5463,\n        "NZD": 1.7997,\n        "ZAR": 20.5097,\n        "USD": 1.1771,\n        "MXN": 26.3381,\n        "ILS": 4.0101,\n        "GBP": 0.90475,\n        "KRW": 1393.92,\n        "MYR": 4.9362\n    },\n    "base": "EUR",\n    "date": "2020-08-12"\n}'

In [14]:
# In order to visualize these changes, we need to print the string
print(json.dumps(response.json(), indent=4))

{
    "rates": {
        "CAD": 1.5644,
        "HKD": 9.1236,
        "ISK": 161.1,
        "PHP": 57.631,
        "DKK": 7.4472,
        "HUF": 345.15,
        "CZK": 26.117,
        "AUD": 1.6514,
        "RON": 4.8356,
        "SEK": 10.2513,
        "IDR": 17374.0,
        "INR": 88.051,
        "BRL": 6.3782,
        "RUB": 86.3013,
        "HRK": 7.4885,
        "JPY": 125.92,
        "THB": 36.62,
        "CHF": 1.0755,
        "SGD": 1.6166,
        "PLN": 4.4033,
        "BGN": 1.9558,
        "TRY": 8.6074,
        "CNY": 8.1758,
        "NOK": 10.5463,
        "NZD": 1.7997,
        "ZAR": 20.5097,
        "USD": 1.1771,
        "MXN": 26.3381,
        "ILS": 4.0101,
        "GBP": 0.90475,
        "KRW": 1393.92,
        "MYR": 4.9362
    },
    "base": "EUR",
    "date": "2020-08-12"
}


In [15]:
# It contains 3 keys; the value for the 'rates' key is another dictionary
response.json().keys()

dict_keys(['rates', 'base', 'date'])

### Incorporating parameters in the GET request

In [16]:
# Request parameters are added to the URL after a question mark '?'
# In this case, we request for the exchange rates of the US Dollar (USD) and Pound Sterling (GBP) only
param_url = base_url + "?symbols=USD,GBP"
param_url

'https://api.exchangeratesapi.io/latest?symbols=USD,GBP'

In [17]:
# Making a request to the server with the new URL, containing the parameters
response = requests.get(param_url)
response.status_code

200

In [18]:
# Saving the response data
data = response.json()
data

{'rates': {'USD': 1.1771, 'GBP': 0.90475}, 'base': 'EUR', 'date': '2020-08-12'}

In [19]:
# 'data' is a dictionary
data['base']

'EUR'

In [20]:
data['date']

'2020-08-12'

In [21]:
data['rates']

{'USD': 1.1771, 'GBP': 0.90475}

In [22]:
# As per the documentation of this API, we can change the base with the parameter 'base'
param_url = base_url + "?symbols=GBP&base=USD"

In [23]:
# Sending a request and saving the response JSON, all at once
data = requests.get(param_url).json()
data

{'rates': {'GBP': 0.7686262849}, 'base': 'USD', 'date': '2020-08-12'}

In [24]:
usd_to_gbp = data['rates']['GBP']
usd_to_gbp

0.7686262849

### Obtaining historical exchange rates

In [25]:
base_url = "https://api.exchangeratesapi.io"

In [26]:
# We can also ask for the exhange rates at a particular day in the past with '/DATE', where DATE is in the format YYYY-MM-DD
historical_url = base_url + "/2016-01-26"
historical_url

'https://api.exchangeratesapi.io/2016-01-26'

In [27]:
# Making the GET request
response = requests.get(historical_url)
response.status_code

200

In [28]:
# Pretty printing the data
data = response.json()
print(json.dumps(data, indent=4))

{
    "rates": {
        "CAD": 1.5411,
        "HKD": 8.4498,
        "SGD": 1.5498,
        "PHP": 52.051,
        "DKK": 7.4622,
        "HUF": 312.73,
        "CZK": 27.021,
        "AUD": 1.555,
        "RON": 4.5348,
        "SEK": 9.2644,
        "IDR": 15004.76,
        "INR": 73.5797,
        "BRL": 4.4465,
        "RUB": 86.7725,
        "HRK": 7.6658,
        "JPY": 128.22,
        "THB": 38.865,
        "CHF": 1.1008,
        "PLN": 4.4942,
        "BGN": 1.9558,
        "TRY": 3.2699,
        "CNY": 7.1314,
        "NOK": 9.4858,
        "NZD": 1.6777,
        "ZAR": 17.8881,
        "USD": 1.0837,
        "MXN": 20.1259,
        "ILS": 4.3084,
        "GBP": 0.76095,
        "KRW": 1303.82,
        "MYR": 4.6335
    },
    "base": "EUR",
    "date": "2016-01-26"
}


### Extracting data for a time period

In [29]:
# The last feautre of this API is: giving the historical exchange rates for every day over some time period

In [30]:
# The URL for this request is formed with '/history' and the parameters 'start_at' and 'end_at'
time_period = base_url + "/history" + "?start_at=2017-04-26&end_at=2018-04-26" + "&symbols=GBP"
time_period

'https://api.exchangeratesapi.io/history?start_at=2017-04-26&end_at=2018-04-26&symbols=GBP'

In [31]:
# Extracting the response JSON object
data = requests.get(time_period).json()

In [32]:
# Pretty printing the JSON
# Notice that the dates are in random order
print(json.dumps(data, indent=4))

{
    "rates": {
        "2017-08-02": {
            "GBP": 0.89425
        },
        "2017-08-07": {
            "GBP": 0.90435
        },
        "2017-12-05": {
            "GBP": 0.88183
        },
        "2017-08-03": {
            "GBP": 0.90318
        },
        "2017-10-17": {
            "GBP": 0.89148
        },
        "2017-05-12": {
            "GBP": 0.84588
        },
        "2017-10-23": {
            "GBP": 0.8909
        },
        "2017-10-04": {
            "GBP": 0.88768
        },
        "2017-08-16": {
            "GBP": 0.90993
        },
        "2017-06-12": {
            "GBP": 0.88545
        },
        "2018-04-10": {
            "GBP": 0.87183
        },
        "2017-09-19": {
            "GBP": 0.88622
        },
        "2017-10-27": {
            "GBP": 0.88633
        },
        "2018-03-26": {
            "GBP": 0.87248
        },
        "2017-09-06": {
            "GBP": 0.91428
        },
        "2018-04-06": {
            "GBP": 0.87295
   

In [33]:
# We can use the 'sort_keys' parameter of the json.dumps() method to order these dates chronologically
print(json.dumps(data, indent=4, sort_keys=True))

{
    "base": "EUR",
    "end_at": "2018-04-26",
    "rates": {
        "2017-04-26": {
            "GBP": 0.84903
        },
        "2017-04-27": {
            "GBP": 0.8442
        },
        "2017-04-28": {
            "GBP": 0.84473
        },
        "2017-05-02": {
            "GBP": 0.8452
        },
        "2017-05-03": {
            "GBP": 0.8444
        },
        "2017-05-04": {
            "GBP": 0.84765
        },
        "2017-05-05": {
            "GBP": 0.8471
        },
        "2017-05-08": {
            "GBP": 0.84465
        },
        "2017-05-09": {
            "GBP": 0.843
        },
        "2017-05-10": {
            "GBP": 0.83985
        },
        "2017-05-11": {
            "GBP": 0.84485
        },
        "2017-05-12": {
            "GBP": 0.84588
        },
        "2017-05-15": {
            "GBP": 0.84928
        },
        "2017-05-16": {
            "GBP": 0.85868
        },
        "2017-05-17": {
            "GBP": 0.85745
        },
        "201

In [34]:
# This data can then be used to plot the change in the exchange rate through time or any other further analysis

### Testing the API response to incorrect input

In [35]:
# Testing how the API behaves if given incorrect input parameters

In [36]:
# Trying out an invalid DATE
invalid_url = base_url + "/2019-13-01"

In [37]:
# Making the request
response = requests.get(invalid_url)
response.status_code # The server responds with a 400 error code indicating a 'bad request'

400

In [38]:
# There is also an error message in the JSON
response.json()

{'error': "time data '2019-13-01' does not match format '%Y-%m-%d'"}

In [39]:
# Testing an invalid BASE CURRENCY
invalid_url = base_url + "/2019-12-01?base=USB"

In [40]:
response = requests.get(invalid_url)
response.status_code

400

In [41]:
response.json()

{'error': "Base 'USB' is not supported."}

In [42]:
# Testing an invalid EXCHANGE CURRENCY
invalid_url = base_url + "/2019-12-01?symbols=WBP"

In [43]:
response = requests.get(invalid_url)
response.status_code

400

In [44]:
response.json()

{'error': "Symbols 'WBP' are invalid for date 2019-12-01."}

### Creating a simple currency convertor

In [45]:
# We can use the data provided from this API to create a simple currency convertor

In [46]:
# Gathering input parameters from the user
date = input("Please enter the date (in the format 'yyyy-mm-dd' or 'latest'): ")
base = input("Convert from (currency): ")
curr = input("Convert to (currency): ")
quan = float(input("How much {} do you want to convert: ".format(base)))

# Constructing the URL based on the user parameters and sending a request to the server
url = base_url + "/" + date + "?base=" + base + "&symbols=" + curr
response = requests.get(url)

# Displaying the error message, if something went wrong
if(response.ok is False):
    print("\nError {}:".format(response.status_code))
    print(response.json()['error'])

else:
    data = response.json()
    rate = data['rates'][curr]
    
    result = quan*rate
    
    print("\n{0} {1} is equal to {2} {3}, based upon exchange rates on {4}".format(quan,base,result,curr,data['date']))


Please enter the date (in the format 'yyyy-mm-dd' or 'latest'): 
Convert from (currency): 
Convert to (currency): 
How much  do you want to convert: 


ValueError: could not convert string to float: ''

## Another example: the iTunes search API

In [47]:
# The documentation for this particular API can be found here:
# https://affiliate.itunes.apple.com/resources/documentation/itunes-store-web-service-search-api/

### Passing parameters in the request

In [48]:
# define base URL
base_site = "https://itunes.apple.com/search"

In [49]:
# We can manually add parameters to the URL, as seen before
# E.G., searching for 'the beatles'
url = base_site + "?term=the+beatles&country=us"

# submit a GET request with parameters needed
requests.get(url)

<Response [200]>

In [50]:
# Note, that the space in 'the beatles' was replaced with a '+' in the URL
# Having to worry about special symbols in the URL can make the code harder to write and 'more buggy'

In [51]:
# Another way of expressing the parameters is to pass them to the get() method
# We pass the key/value parameter pairs as a dictionary to 'params'

r = requests.get(base_site, params = {"term": "the beatles", "country": "us"})
r.status_code

200

In [52]:
# The request package incorporates those paramaters into the URL automatically
# check the URL we submitted the request to
r.url

'https://itunes.apple.com/search?term=the+beatles&country=us'

In [53]:
# This way of stating parameters is the preffered one

### Investigating the output and parameters

In [54]:
# The request went through OK
r.status_code

200

In [55]:
# Inspecting the response's JSON
info = r.json()
print(json.dumps(info, indent=4))

{
    "resultCount": 50,
    "results": [
        {
            "wrapperType": "track",
            "kind": "song",
            "artistId": 136975,
            "collectionId": 1474815798,
            "trackId": 1474815898,
            "artistName": "The Beatles",
            "collectionName": "Abbey Road (2019 Mix)",
            "trackName": "Here Comes the Sun",
            "collectionCensoredName": "Abbey Road (2019 Mix)",
            "trackCensoredName": "Here Comes the Sun (2019 Mix)",
            "artistViewUrl": "https://music.apple.com/us/artist/the-beatles/136975?uo=4",
            "collectionViewUrl": "https://music.apple.com/us/album/here-comes-the-sun-2019-mix/1474815798?i=1474815898&uo=4",
            "trackViewUrl": "https://music.apple.com/us/album/here-comes-the-sun-2019-mix/1474815798?i=1474815898&uo=4",
            "previewUrl": "https://audio-ssl.itunes.apple.com/itunes-assets/AudioPreview123/v4/a4/d6/36/a4d6368e-731a-b2dc-d1a2-786e7886fbc1/mzaf_10527553788341453800.p

In [56]:
# This seems to contain a lot of data
# Let's check if there are some keys we don't see at first glance in the outermost dictionary
info.keys()

dict_keys(['resultCount', 'results'])

In [57]:
# There are, indeed, only these two keys

In [58]:
# The second one contains a list of all the results
# Let's look at one such result
print(json.dumps(info['results'][0], indent=4))

# It's a simple dictionary with a lot of data

{
    "wrapperType": "track",
    "kind": "song",
    "artistId": 136975,
    "collectionId": 1474815798,
    "trackId": 1474815898,
    "artistName": "The Beatles",
    "collectionName": "Abbey Road (2019 Mix)",
    "trackName": "Here Comes the Sun",
    "collectionCensoredName": "Abbey Road (2019 Mix)",
    "trackCensoredName": "Here Comes the Sun (2019 Mix)",
    "artistViewUrl": "https://music.apple.com/us/artist/the-beatles/136975?uo=4",
    "collectionViewUrl": "https://music.apple.com/us/album/here-comes-the-sun-2019-mix/1474815798?i=1474815898&uo=4",
    "trackViewUrl": "https://music.apple.com/us/album/here-comes-the-sun-2019-mix/1474815798?i=1474815898&uo=4",
    "previewUrl": "https://audio-ssl.itunes.apple.com/itunes-assets/AudioPreview123/v4/a4/d6/36/a4d6368e-731a-b2dc-d1a2-786e7886fbc1/mzaf_10527553788341453800.plus.aac.p.m4a",
    "artworkUrl30": "https://is2-ssl.mzstatic.com/image/thumb/Music123/v4/6e/db/f5/6edbf5a8-b990-8f89-af12-8cc464f03da1/source/30x30bb.jpg",
    "

In [59]:
# The first one states how many results are shown (by default, 50)
info["resultCount"]

50

In [60]:
# The number of results can be set (to a maximum of 200) with the 'limit' parameter
r = requests.get(base_site, params = {"term": "the beatles", "country": "us", "limit": 200})
r.ok

True

In [61]:
info = r.json()
info

{'resultCount': 200,
 'results': [{'wrapperType': 'track',
   'kind': 'song',
   'artistId': 136975,
   'collectionId': 1474815798,
   'trackId': 1474815898,
   'artistName': 'The Beatles',
   'collectionName': 'Abbey Road (2019 Mix)',
   'trackName': 'Here Comes the Sun',
   'collectionCensoredName': 'Abbey Road (2019 Mix)',
   'trackCensoredName': 'Here Comes the Sun (2019 Mix)',
   'artistViewUrl': 'https://music.apple.com/us/artist/the-beatles/136975?uo=4',
   'collectionViewUrl': 'https://music.apple.com/us/album/here-comes-the-sun-2019-mix/1474815798?i=1474815898&uo=4',
   'trackViewUrl': 'https://music.apple.com/us/album/here-comes-the-sun-2019-mix/1474815798?i=1474815898&uo=4',
   'previewUrl': 'https://audio-ssl.itunes.apple.com/itunes-assets/AudioPreview123/v4/a4/d6/36/a4d6368e-731a-b2dc-d1a2-786e7886fbc1/mzaf_10527553788341453800.plus.aac.p.m4a',
   'artworkUrl30': 'https://is2-ssl.mzstatic.com/image/thumb/Music123/v4/6e/db/f5/6edbf5a8-b990-8f89-af12-8cc464f03da1/source/30x3

In [62]:
len(info['results'])

200

In [63]:
# Finally, let's check the response to an invalid input
check_resp = requests.get(base_site, params = {"term": "alternative", "country": "us", "media": "hahaha"})
check_resp.ok

False

In [64]:
# Status code is 400 - meaning 'Bad request'
check_resp.status_code

400

In [65]:
# Error message
check_resp.json()

{'errorMessage': 'Invalid value(s) for key(s): [mediaType]',
 'queryParameters': {'output': 'json',
  'callback': 'A javascript function to handle your search results',
  'country': 'ISO-2A country code',
  'limit': 'The number of search results to return',
  'term': 'A search string',
  'lang': 'ISO-2A language code'}}

### Structuring and exporting the data

In [66]:
# It may be useful to store the data in a structured form
# The pandas package is great for that, as we can use its dataframe (basically a table)
# Since the results is a list of 'shallow' dictionaries, it neatly fits into a table
# A more complicated, nested dictionary may not be easily transformable into a table

In [67]:
import pandas as pd

In [68]:
# Creating the dataframe and populating it with the results of our search
songs_df = pd.DataFrame(info["results"])
songs_df

Unnamed: 0,wrapperType,kind,artistId,collectionId,trackId,artistName,collectionName,trackName,collectionCensoredName,trackCensoredName,...,contentAdvisoryRating,longDescription,shortDescription,description,copyright,feedUrl,artworkUrl600,genreIds,genres,amgArtistId
0,track,song,1.369750e+05,1.474816e+09,1.474816e+09,The Beatles,Abbey Road (2019 Mix),Here Comes the Sun,Abbey Road (2019 Mix),Here Comes the Sun (2019 Mix),...,,,,,,,,,,
1,track,song,1.369750e+05,1.474816e+09,1.474816e+09,The Beatles,Abbey Road (2019 Mix),Come Together,Abbey Road (2019 Mix),Come Together (2019 Mix),...,,,,,,,,,,
2,track,song,1.369750e+05,1.440833e+09,1.440834e+09,The Beatles,1 (2015 Version),All You Need Is Love,1 (2015 Version),All You Need Is Love (2015 Stereo Mix),...,,,,,,,,,,
3,track,song,1.369750e+05,1.440833e+09,1.440834e+09,The Beatles,1 (2015 Version),Hey Jude,1 (2015 Version),Hey Jude (2015 Stereo Mix),...,,,,,,,,,,
4,track,song,1.369750e+05,1.441133e+09,1.441133e+09,The Beatles,The Beatles 1967-1970 (The Blue Album),Hey Jude,The Beatles 1967-1970 (The Blue Album),Hey Jude,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,audiobook,,1.085844e+09,1.446701e+09,,Judith Bourque,Robes of Silk Feet of Clay: The True Story of ...,,Robes of Silk Feet of Clay: The True Story of ...,,...,,,,Maharishi Mahesh Yogi was the charismatic lead...,© 2018 Blackstone Audio,,,,,
196,audiobook,,4.416594e+06,1.504963e+09,,John Lennon,"Beatles Tapes, The: Rare Interviews",,"Beatles Tapes, The: Rare Interviews",,...,,,,This is an audio album of interviews with each...,© 2019 HN Publishing,,,,,
197,audiobook,,2.065398e+06,1.475783e+09,,Geoffrey Giuliano,The Beatle's Guru Maharishi Mahesh Yogi: The L...,,The Beatle's Guru Maharishi Mahesh Yogi: The L...,,...,,,,When the Beatles turned onto meditation in 196...,© 2019 Author's Republic,,,,,
198,track,feature-movie,,,5.687789e+08,Seth Swirsky,,Beatles Stories,,Beatles Stories,...,NR,Songwriter Seth Swirsky grew up in the 1960s l...,Songwriter Seth Swirsky grew up in the 1960s l...,,,,,,,


In [69]:
# Exporting the data to a CSV (Comma Separated Values) file
songs_df.to_csv("songs_info.csv")

# Pagination

In [70]:
# Loading the packages
import requests
import json

In [71]:
# We will use API for job listings on Github
# Documentation can be found on: https://jobs.github.com/api

In [72]:
# define base URL
base_site = "https://jobs.github.com/positions.json"

In [73]:
# Submiting a GET request
r = requests.get(base_site, params = {"description": "data science", "location": "los angeles"})
r.status_code

200

In [74]:
# Inspect the response
r.json()

[{'id': '11cbce13-e6cd-4c79-b904-d292b569b22f',
  'type': 'Full Time',
  'url': 'https://jobs.github.com/positions/11cbce13-e6cd-4c79-b904-d292b569b22f',
  'created_at': 'Wed Jul 15 12:17:30 UTC 2020',
  'company': 'OpenPlay',
  'company_url': 'http:',
  'location': 'Santa Monica',
  'title': 'Software Enginee',
  'description': '<h2>Company Description</h2>\n<p><strong>Hello, we’re OpenPlay!</strong></p>\n<p>We’re looking for a full-stack engineer to work with us on our web-based music distribution application. You’ll work with us to design, develop, and deploy new application features for one of the world’s largest record companies. This is a full-time position that can be local (Los Angeles) or fully remote.</p>\n<h2>What’s an OpenPlay?</h2>\n<p>We’re a small software team making distribution and workflow products for some of the world’s largest music companies. We’re big on testing, constant incremental improvement, craftsmanship, and pragmatism, and so far have been able to use th

In [75]:
# How many jobs have been found?
len(r.json())

4

### The page parameter

In [76]:
# Let's search for all jobs (no filter parameters)
r =  requests.get(base_site)
r.ok

True

In [77]:
r.json()

[{'id': '47ab6c9d-7ea0-4ae0-94d2-27e0e02ead41',
  'type': 'Full Time',
  'url': 'https://jobs.github.com/positions/47ab6c9d-7ea0-4ae0-94d2-27e0e02ead41',
  'created_at': 'Thu Aug 13 14:53:11 UTC 2020',
  'company': 'Verition Fund Management',
  'company_url': 'http://www.veritionfund.com',
  'location': 'NYC/CT/Remote',
  'title': 'Senior Developer',
  'description': '<p>Verition Fund Management is looking to hire a Senior Developer to join the Firm’s low latency trading platform team. The role is full time, and can be remote for the right candidate.</p>\n<p>Required:\n-Advanced hands on C++ 11/14/17 experience developing multithreaded applications\n-Experience debugging with gdb\n-Experience writing in Python (pandas, numpy) as well as Bash and GTest\n-Experienced on linux systems\n-Familiar with git, TCP/IP, UDP, ZMQ (or other messaging libraries)</p>\n<p>The following would be a plus:\n-Work experience in financial industry, specifically on low latency trading systems\n-Experience i

In [78]:
len(r.json())

50

In [79]:
# According to the documentation, the results are split into pages
# These were the results from the first page only

In [80]:
# To get the next page, we need to make another GET request with parameter 'page'
r =  requests.get(base_site, params = {"page": 2})
r.status_code

200

In [81]:
r.json()

[{'id': '5494086a-cf57-46f1-86bb-206bd7f9284b',
  'type': 'Full Time',
  'url': 'https://jobs.github.com/positions/5494086a-cf57-46f1-86bb-206bd7f9284b',
  'created_at': 'Mon Aug 03 08:37:13 UTC 2020',
  'company': 'InnoGames GmbH',
  'company_url': 'https://www.innogames.com',
  'location': 'Hamburg',
  'title': 'Java Software Developer - Core Team',
  'description': '<p>As <strong>Java Software Developer</strong>, you work closely with our game teams to shape the foundation of our next big hits and contribute as a productive member of an agile development team in all phases of the development lifecycle.</p>\n<p><strong>Your mission:</strong></p>\n<ul>\n<li>Develop features shared across our game backends</li>\n<li>Improve performance-critical components, increase the robustness and scalability of our software</li>\n<li>Improve our libraries and backend services with your knowledge of software architecture and high-quality code</li>\n<li>Maintain and extend our development tools and e

In [82]:
len(r.json())

50

In [83]:
# Making a request to a non-existing page
r = requests.get(base_site, params = {"page": 10})
r.status_code

200

In [84]:
# The response is an empty list
r.json()

[]

### Extracting results from multiple pages

In [85]:
# Let's obtain the results of the first 5 pages
results = []

In [86]:
for i in range(5):
    r =  requests.get(base_site, params = {"page": i+1})
    
    if len(r.json()) == 0:   # We have reached the end of the results
        break
    else:
        # Add the response results to our list of results
        results.extend(r.json())


In [87]:
# number of found jobs
len(results)

239

# API which requires registration - POST request

### Registering to the API

In [88]:
# We will use a nutritional analysis API
# It requires registration (we need an API key to validate ourselves)
# Many APIs require this kind of registration

In [89]:
# You can sign-up for the Developer (Free) edition here: 
#        https://developer.edamam.com/edamam-nutrition-api

# API documentation: 
#        https://developer.edamam.com/edamam-docs-nutrition-api

### Initial Setup

In [90]:
# loading the packages
import requests
import json

In [91]:
# Store the ID and Key in variables
APP_ID = "d9bdc4e1"
APP_KEY = "c8d05f83c3063734a9f60d2d9309e85f"

#APP_ID = "your_API_ID_here"
#APP_KEY = "your_API_key_here"

# Note: Those are not real ID and Key,
# Replace the string with your own ones that you recieved upon registration

In [92]:
# Setting up the request URL
api_endpoint = "https://api.edamam.com/api/nutrition-details"

url = api_endpoint + "?app_id=" + APP_ID + "&app_key=" + APP_KEY

### Sending a POST request

In [93]:
# Defining the header (as stated in the documentation)
headers = {
    'Content-Type' : 'application/json'
}

In [94]:
# Defining the payload of the request (the data we actually want processed)
recipe = {
    'title' : 'Cappuccino',
    'ingr' : ['18g ground espresso (or 1 espresso pod)', '150ml milk']
}

In [95]:
# Submitting a POST request
r = requests.post(url, headers = headers, json = recipe)
r.status_code

200

### Inspecting the response

In [96]:
# In the documentation it is stated that the response is JSON
capp_info = r.json()
print(json.dumps(capp_info, indent=4))

{
    "uri": "http://www.edamam.com/ontologies/edamam.owl#recipe_d8d967e603d64d849bb2f77202c6c4ae",
    "yield": 2.0,
    "calories": 95,
    "totalWeight": 172.69915386093172,
    "dietLabels": [],
    "healthLabels": [
        "SUGAR_CONSCIOUS",
        "VEGETARIAN",
        "PEANUT_FREE",
        "TREE_NUT_FREE",
        "ALCOHOL_FREE",
        "SULPHITE_FREE"
    ],
    "cautions": [
        "SULFITES"
    ],
    "totalNutrients": {
        "ENERC_KCAL": {
            "label": "Energy",
            "quantity": 95.98648385516834,
            "unit": "kcal"
        },
        "FAT": {
            "label": "Fat",
            "quantity": 5.060122500480281,
            "unit": "g"
        },
        "FASAT": {
            "label": "Saturated",
            "quantity": 2.9016992195063764,
            "unit": "g"
        },
        "FATRN": {
            "label": "Trans",
            "quantity": 0.0,
            "unit": "g"
        },
        "FAMS": {
            "label": "Monounsaturated

In [97]:
# There is a lot of info contained in this JSON
capp_info.keys()

dict_keys(['uri', 'yield', 'calories', 'totalWeight', 'dietLabels', 'healthLabels', 'cautions', 'totalNutrients', 'totalDaily', 'ingredients', 'totalNutrientsKCal'])

In [98]:
# Let's check the 'totalNutrients' key
print(json.dumps(capp_info["totalNutrients"], indent=4))

{
    "ENERC_KCAL": {
        "label": "Energy",
        "quantity": 95.98648385516834,
        "unit": "kcal"
    },
    "FAT": {
        "label": "Fat",
        "quantity": 5.060122500480281,
        "unit": "g"
    },
    "FASAT": {
        "label": "Saturated",
        "quantity": 2.9016992195063764,
        "unit": "g"
    },
    "FATRN": {
        "label": "Trans",
        "quantity": 0.0,
        "unit": "g"
    },
    "FAMS": {
        "label": "Monounsaturated",
        "quantity": 1.2561571293507656,
        "unit": "g"
    },
    "FAPU": {
        "label": "Polyunsaturated",
        "quantity": 0.31822335002881685,
        "unit": "g"
    },
    "CHOCDF": {
        "label": "Carbs",
        "quantity": 7.726159385324722,
        "unit": "g"
    },
    "FIBTG": {
        "label": "Fiber",
        "quantity": 0.0,
        "unit": "g"
    },
    "SUGAR": {
        "label": "Sugars",
        "quantity": 7.812307269977051,
        "unit": "g"
    },
    "PROCNT": {
        "label

In [99]:
# Taking the sugar content
capp_info["totalNutrients"]["SUGAR"]

{'label': 'Sugars', 'quantity': 7.812307269977051, 'unit': 'g'}

In [100]:
# Or going even deeper
capp_info["totalNutrients"]["SUGAR"]["quantity"]

7.812307269977051

### Structuring and exporting data

In [101]:
# Again, we can use a dataframe to more clearly represent the data
import pandas as pd

pd.DataFrame(capp_info["totalNutrients"])

Unnamed: 0,ENERC_KCAL,FAT,FASAT,FATRN,FAMS,FAPU,CHOCDF,FIBTG,SUGAR,PROCNT,...,NIA,VITB6A,FOLDFE,FOLFD,FOLAC,VITB12,VITD,TOCPHA,VITK1,WATER
label,Energy,Fat,Saturated,Trans,Monounsaturated,Polyunsaturated,Carbs,Fiber,Sugars,Protein,...,Niacin (B3),Vitamin B6,Folate equivalent (total),Folate (food),Folic acid,Vitamin B12,Vitamin D,Vitamin E,Vitamin K,Water
quantity,95.9865,5.06012,2.9017,0,1.25616,0.318223,7.72616,0,7.81231,4.89462,...,1.07494,0.0560517,7.91496,7.91496,0,0.696146,2.01109,0.110089,0.482097,153.94
unit,kcal,g,g,g,g,g,g,g,g,g,...,mg,mg,µg,µg,µg,µg,µg,mg,µg,g


In [102]:
# The vertical orientation seems more easier to read
# We achieve that by rotating the dataframe with .transpose()
capp_nutrients = pd.DataFrame(capp_info["totalNutrients"]).transpose()
capp_nutrients

Unnamed: 0,label,quantity,unit
ENERC_KCAL,Energy,95.9865,kcal
FAT,Fat,5.06012,g
FASAT,Saturated,2.9017,g
FATRN,Trans,0.0,g
FAMS,Monounsaturated,1.25616,g
FAPU,Polyunsaturated,0.318223,g
CHOCDF,Carbs,7.72616,g
FIBTG,Fiber,0.0,g
SUGAR,Sugars,7.81231,g
PROCNT,Protein,4.89462,g


In [103]:
# Exporting the nutrition values to a CSV file
capp_nutrients.to_csv("Cappuccino_nutrients.csv")

### Testing invalid input

In [104]:
# Sending a request not containing the 'ingridients' parameter
cake = requests.post(url, headers = headers, json = {"title": "cheesecake"})
cake.ok

False

In [105]:
# The status code is 555
# This is not an official HTTP status code, but one defined in the documentation of the API
cake.status_code

555

In [106]:
cake.json()

{'error': 'low_quality'}