### Experiments querying the Eurostat SDMX registry for DSDs and codelists:
-- Olav ten Bosch, 20210508

In [15]:
import requests                  # for issueing HTTP requests
import json                      # for processing the json data

- Viewer: https://webgate.ec.europa.eu/sdmxregistry/
- Documentation: https://ec.europa.eu/eurostat/web/sdmx-web-services/rest-sdmx-2.1

In [16]:
# Settings for the ESTAT SDMX registry API:
endpoint = 'http://ec.europa.eu/eurostat/SDMX/diss-web/rest/'

In [None]:
# Retrieve all dataflows:
resource = 'dataflow'
agencyID = 'ESTAT'
resourceID = 'all' # this is from the example, takes long!  
#resourceID = '?????' // filling in a dataflow from the viewer do not seem to work here
version = 'latest'

r1 = requests.get(f"{endpoint}{resource}/{agencyID}/{resourceID}/{version}")
print(r1.url, r1.status_code)
#print(r1.text) # dont print this out, its too much for the notebook

In [24]:
x = r1.text.find('DSD_nama_10_gdp')
print(x)
print(r1.text[x-100:x+100])

3122571
x composants (production, dépenses et revenu)</com:Name>
        <str:Structure>
          <Ref id="DSD_nama_10_gdp" version="1.0" agencyID="ESTAT" package="datastructure" class="DataStructure"/>
    


In [28]:
# Retrieve a specific datastructure including codelist:
resource = 'datastructure'
agencyID = 'ESTAT'
resourceID = 'DSD_nama_10_gdp' # this is from the example, but where to find this identifier?
#resourceID = 'STSALL' # this is from the example, but where to find this identifier?
version = 'latest'

r2 = requests.get(f"{endpoint}{resource}/{agencyID}/{resourceID}/{version}")
print(r2.url, r2.status_code)
print(r2.text)


http://ec.europa.eu/eurostat/SDMX/diss-web/rest/datastructure/ESTAT/DSD_nama_10_gdp/latest 200
<?xml version="1.0" encoding="UTF-8"?>
<mes:Structure xmlns:mes="http://www.sdmx.org/resources/sdmxml/schemas/v2_1/message" xmlns:str="http://www.sdmx.org/resources/sdmxml/schemas/v2_1/structure" xmlns:com="http://www.sdmx.org/resources/sdmxml/schemas/v2_1/common">
  <mes:Header>
    <mes:ID>IDREF318722</mes:ID>
    <mes:Test>false</mes:Test>
    <mes:Prepared>2021-05-09T07:48:46.609Z</mes:Prepared>
    <mes:Sender id="Unknown"/>
    <mes:Receiver id="Unknown"/>
  </mes:Header>
  <mes:Structures>
    <str:Codelists>
      <str:Codelist id="CL_FREQ" urn="urn:sdmx:org.sdmx.infomodel.codelist.Codelist=ESTAT:CL_FREQ(1.0)" agencyID="ESTAT" version="1.0" isFinal="true">
        <com:Name xml:lang="en">FREQ</com:Name>
        <str:Code id="D" urn="urn:sdmx:org.sdmx.infomodel.codelist.Code=ESTAT:CL_FREQ(1.0).D">
          <com:Name xml:lang="en">Daily</com:Name>
        </str:Code>
        <str:Code 

In [5]:
# We dont use this, keep here for reference:
# SDMX formats according to https://github.com/sdmx-twg/sdmx-rest/blob/master/v2_1/ws/rest/docs/rest_cheat_sheet.pdf?raw=true
formats = {
    'SDMX-ML Generic Data': 'application/vnd.sdmx.genericdata+xml;version=2.1',
    'SDMX-ML StructureSpecific Data': 'application/vnd.sdmx.structurespecificdata+xml;version=2.1',
    'SDMX-JSON Data': 'application/vnd.sdmx.data+json;version=1.0.0',
    'SDMX-CSV Data': 'application/vnd.sdmx.data+csv;version=1.0.0',
    'SDMX-ML Structure': 'application/vnd.sdmx.structure+xml;version=2.1',
    'SDMX-JSON Structure': 'application/vnd.sdmx.structure+json;version=1.0.0',
    'SDMX-ML Schemas': 'application/vnd.sdmx.schema+xml;version=2.1',
    'SDMX-ML Generic Metadata': 'application/vnd.sdmx.genericmetadata+xml;version=2.1',
    'SDMX-ML StructureSpecific Meta': 'application/vnd.sdmx.structurespecificmetadata+xml;version=2.1'
}
#Only these three give a 200, the rest a 406 for registry.sdmx.org
headers1 = {'Accept': formats["SDMX-ML StructureSpecific Data"]}
headers2 = {'Accept': formats["SDMX-JSON Data"]}
headers3 = {'Accept': formats["SDMX-JSON Structure"]}
headers = headers2
# It appears that content-negation is not needed for the global registry, we can use a format querystring var
# We keep it here for reference