Copied from https://bitbucket.org/okusche/glad_api_demo/src/brightcon2022/

**Conference Notebook Kernel: `bw25`**

In [None]:
import json
import requests
import pandas as pd

In [None]:
# GLAD API base URL
base_url = 'https://www.globallcadataaccess.org/api/v1/'

# We need an API key to access the API which we obtain from our user account there
api_key = '42ef8e1ad9e092071ce7004c1ca87574'

In [None]:
# first, we send some request to see whether everything works as it's supposed to
try:
    url = base_url + 'search?query=maize'
    req_headers = {
        'api-key': api_key
    }
    response = requests.get(url, headers = req_headers)
    result = response.json()
    print("total results: ", result['resultInfo']['totalCount'])
    print("response body:\n", response.text)
except:
    print("Invalid URL or some error occured while making the request to the specified URL")


In [None]:
# now let's filter for stuff that is
#  - available online (publiclyAccessible=true)
#  - available free of charge (free=true)
#  - in ILCD format (format=ILCD)

try:
    url = base_url + 'search?free=true&publiclyAccessible=true&format=ILCD'
    req_headers = {
        'api-key': api_key
    }
    response = requests.get(url, headers = req_headers)
    result = response.json()
    print("total results: ", result['resultInfo']['totalCount'])
    print("first result's process name:\n", result['data'][0]['name'])
except:
    print("Invalid URL or some error occured while making the request to the specified URL")

In [None]:
# let's examine where the results come from
aggregations = result['aggregations']


def find_aggregation(data, aggregation_name):
    return list(filter(lambda x: x['name'] == aggregation_name, data))


def show_aggregation(aggregation_name):
    agg = find_aggregation(aggregations, aggregation_name)
    print("no. of results from ", aggregation_name)
    print("-" * (20 + len(aggregation_name)))      
    for i in agg[0]['entries']:
        print(i['key'], i['count'])
    print("")


agg_dataproviders = find_aggregation(aggregations, 'dataprovider')
print("data providers aggregation:\n", agg_dataproviders, "\n")

show_aggregation('dataprovider')

show_aggregation('location')

show_aggregation('supportedNomenclatures')

In [None]:
# now we convert the result into a dataframe
df = pd.DataFrame.from_dict(pd.json_normalize(result['data']), orient='columns')
df

In [None]:
# let's reorder it nicely
df = pd.DataFrame(df, columns=['name', 'location', 'category', 'dataprovider', 'dataSetUrl', 'supportedNomenclatures', 'reviewType', 'reviewers', 'description'])
df

In [None]:
# retrieve a dataset in original XML format (the process dataset only)
ds_url = df.at[0,'dataSetUrl']
print("retrieving " + ds_url)
response = requests.get(ds_url)
xml_dataset = response.text
print(xml_dataset)

In [None]:
# retrieve a dataset in original XML format (ZIP file with dependencies) 
# TODO

In [None]:
# retrieve the dataset in JSON representation
ds_url_json = ds_url.replace('format=xml', 'format=json')
print("retrieving " + ds_url_json)
response = requests.get(ds_url_json)
json_dataset = response.text
print(json_dataset)

In [None]:
# retrieve the dataset in JSON representation and extended view
ds_url_json_ext = ds_url_json + '&view=extended'
print("retrieving " + ds_url_json_ext)
response = requests.get(ds_url_json_ext)
json_dataset_ext = response.text
print(json_dataset_ext)

In [None]:
# extract exchanges as separate data frame
process = response.json()
dfex = pd.DataFrame.from_dict(pd.json_normalize(process['exchanges'], record_path='exchange'), orient='columns')
#dfex

In [None]:
# clean up
dfex.drop(['referenceToFlowDataSet.type', 'referenceToFlowDataSet.uri'], axis=1, inplace=True)
dfex.rename(columns = {'referenceToFlowDataSet.shortDescription':'flow name'}, inplace=True)
dfex['flow name'] = dfex['flow name'].apply(lambda x: x[0])
dfex['flow name'] = dfex['flow name'].apply(lambda x: x['value'])
dfex