Copied from https://bitbucket.org/okusche/glad_api_demo/src/brightcon2022/

**Conference Notebook Kernel: `bw25`**
# GLAD API Demo

This demos how to query the GLAD index and load some data directly from online nodes into our notebook.

Note:
- This currently works only on nodes which do not require authentication.
- This works only for data that in ILCD format **and** is available publicly **and** free of charge.

In [None]:
import json
import requests
import pandas as pd

# GLAD API base URL
base_url = 'https://www.globallcadataaccess.org/api/v1/'

### We need an API key to access the API which we obtain from our user account there
(this one actually works for your convenience)

In [None]:
api_key = '42ef8e1ad9e092071ce7004c1ca87574'

# first, we send some request to see whether everything works as it's supposed to

In [None]:
try:
    url = base_url + 'search?query=maize'
    req_headers = {
        'api-key': api_key
    }
    response = requests.get(url, headers = req_headers)
    result = response.json()
    print("total results: ", result['resultInfo']['totalCount'], "\n")
    print("response body:\n", response.text[:2000] + "<snip>")
except:
    print("Invalid URL or some error occured while making the request to the specified URL")

### now let's filter for all the stuff that is

- available online (`publiclyAccessible=true`)
- available free of charge (`free=true`)
- in ILCD format (`format=ILCD`)

In [None]:
try:
    url = base_url + 'search?free=true&publiclyAccessible=true&format=ILCD'
    req_headers = {
        'api-key': api_key
    }
    response = requests.get(url, headers = req_headers)
    result = response.json()
    print("total results: ", result['resultInfo']['totalCount'])
    print("first result's process name:\n", result['data'][0]['name'])
except:
    print("Invalid URL or some error occured while making the request to the specified URL")

### let's examine where the results come from

In [None]:
aggregations = result['aggregations']


def find_aggregation(data, aggregation_name):
    return list(filter(lambda x: x['name'] == aggregation_name, data))


def show_aggregation(aggregation_name):
    agg = find_aggregation(aggregations, aggregation_name)
    print("no. of results from ", aggregation_name)
    print("-" * (21 + len(aggregation_name)))      
    for i in agg[0]['entries']:
        print(i['key'], i['count'])
    print("")


agg_dataproviders = find_aggregation(aggregations, 'dataprovider')
print("data providers aggregation:\n", agg_dataproviders, "\n")

show_aggregation('dataprovider')
show_aggregation('location')
show_aggregation('supportedNomenclatures')

### let's do the same again with a simple query term

In [None]:
try:
    url = base_url + 'search?free=true&publiclyAccessible=true&format=ILCD&query=steel'
    req_headers = {
        'api-key': api_key
    }
    response = requests.get(url, headers = req_headers)
    result = response.json()
    print("total results: ", result['resultInfo']['totalCount'])
    print("first result's process name:\n", result['data'][0]['name'])
except:
    print("Invalid URL or some error occured while making the request to the specified URL")
    
aggregations = result['aggregations']

agg_dataproviders = find_aggregation(aggregations, 'dataprovider')
print("data providers aggregation:\n", agg_dataproviders, "\n")

show_aggregation('dataprovider')
show_aggregation('location')
show_aggregation('supportedNomenclatures')

### now we convert the result set into a dataframe and reorder it nicely

In [None]:
df = pd.DataFrame.from_dict(pd.json_normalize(result['data']), orient='columns')
df = pd.DataFrame(df, columns=['name', 'location', 'category', 'dataprovider', 'dataSetUrl', 'supportedNomenclatures', 'reviewType', 'reviewers', 'description'])
df

### retrieve the first process dataset in original XML format (the process dataset only)

In [None]:
ds_url = df.at[0,'dataSetUrl']
print("retrieving " + ds_url)
response = requests.get(ds_url)
xml_dataset = response.text
print(xml_dataset[:2000] + "<snip>")

### retrieve the process dataset in JSON representation

In [None]:
ds_url_json = ds_url.replace('format=xml', 'format=json')
print("retrieving " + ds_url_json)
response = requests.get(ds_url_json)
json_dataset = response.text
print(json_dataset[:2000] + "<snip>")

### retrieve the process dataset in JSON representation and extended view

In [None]:
ds_url_json_ext = ds_url_json + '&view=extended'
print("retrieving " + ds_url_json_ext)
response = requests.get(ds_url_json_ext)
json_dataset_ext = response.text
print(json_dataset_ext)

### now we extract the exchanges into a separate data frame

In [None]:
process = response.json()
dfex = pd.DataFrame.from_dict(pd.json_normalize(process['exchanges'], record_path='exchange'), orient='columns')
#dfex

In [None]:
# clean up
dfex.drop(['referenceToFlowDataSet.type', 'referenceToFlowDataSet.uri'], axis=1, inplace=True)
dfex.rename(columns = {'referenceToFlowDataSet.shortDescription':'flow name'}, inplace=True)
dfex['flow name'] = dfex['flow name'].apply(lambda x: x[0])
dfex['flow name'] = dfex['flow name'].apply(lambda x: x['value'])
dfex