In [1]:
# Install requirements
# !pip install -r requirements.txt

# Example code for querying geonetwork

Mostly from https://owslib.readthedocs.io/en/latest/usage.html#csw

In [2]:
import yaml
import owslib
from owslib.csw import CatalogueServiceWeb
from owslib.fes import PropertyIsEqualTo, PropertyIsLike, BBox

# Set up your username and password in a file creds.yml with the contents:
# username: xxxxx
# password: xxxxx

server = "https://catalogue.grumets.cat"
with open("./secrets.yml") as f:  creds = yaml.safe_load(f)

# Alternatively you can use a local catalogue using docker and the provided docker compose file
# server = "http://localhost:8080"
# creds = dict(username = 'geonetwork', password = 'geonetwork')

endpoint = server + "/geonetwork/srv/eng/csw?SERVICE=CSW&VERSION=2.0.2&REQUEST=GetCapabilities"

assert "username" in creds
assert "password" in creds
auth = owslib.util.Authentication(**creds)
csw = CatalogueServiceWeb(endpoint, timeout=60, 
                          auth = auth)

## Results contrained by keywords

In [3]:
query = PropertyIsEqualTo('csw:AnyText', 'water')
csw.getrecords2(constraints=[query], maxrecords=100)
csw.results

{'matches': 24, 'returned': 24, 'nextrecord': 0}

## Get all entries


In [4]:
csw.getrecords2(maxrecords=100)
csw.results

{'matches': 53, 'returned': 53, 'nextrecord': 0}

In [5]:
records = {}

for rec in csw.records:
    r = csw.records[rec]
    records[r.title] = r
    print(r.title)

# Choose the first record we found as an example
record = list(records.items())[0][1]

The Geoffrey's Tube Z3950 Server (Sample Record - Please Delete!)
Geoscience Australia's Open Day                             Photographs 26th August 2007
Localities in Victoria (VMADMIN.LOCALITY_POLYGON) - Comprehensive Elements
Hydrological Basins in Africa (Sample record, please remove!)
Plume Flow Air quality data for AD4GD
plume_example_points.shp
Surface water conductivity in river Spree, 2016
Citizen Science biodiversity observations in Farmer Cluster Mostviertler, Grünland, Austria
City Nature Challenge 2023 - biodiversity observations in Farmer Cluster Monte Pisano, Calci, Italy
PurpleAir Indoor Sensor Air Quality Data Sample for AD4GD (Sensor ID 157047)
IoT Sensor in Geneva
Continuous E.coli measurements in surface water
Citizen Science biodiversity observations in Farmer Cluster Monte Pisano, Calci, Italy
Halensee- water level and water temperature
Orankesee- water level and water temperature
Berlin Lake Mueggelsee Water Quality Dataset (AD4GD copy hosted on MinIO)
Fennpfuhl

In [6]:
# Useful keys on the record object
[k for k in dir(record) if not k.startswith("_")]

['abstract',
 'accessrights',
 'alternative',
 'bbox',
 'bbox_wgs84',
 'contributor',
 'coverage',
 'created',
 'creator',
 'date',
 'format',
 'identifier',
 'identifiers',
 'ispartof',
 'issued',
 'language',
 'license',
 'modified',
 'publisher',
 'rdf',
 'references',
 'relation',
 'rights',
 'rightsholder',
 'source',
 'spatial',
 'subjects',
 'temporal',
 'title',
 'type',
 'uris',
 'xml']

## Pretty print the summary xml returned by a query

In [7]:
import xml.etree.ElementTree as ET

xml = ET.fromstring(record.xml)
print(ET.tostring(xml, encoding="unicode", method="xml"))

<ns0:SummaryRecord xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:ns0="http://www.opengis.net/cat/csw/2.0.2" xmlns:ns2="http://purl.org/dc/terms/">
      <dc:identifier>9f4c30af-d37f-4239-8b90-14eb7ea9d1d4</dc:identifier>
      <dc:title>The Geoffrey's Tube Z3950 Server (Sample Record - Please Delete!)</dc:title>
      <dc:type>service</dc:type>
      <ns2:modified>2010-03-24</ns2:modified>
      <ns2:abstract>This catalog is for registering all metadata records held by the Geofffrey's Tube Palace Hotel Ballroom.</ns2:abstract>
    </ns0:SummaryRecord>


## Get the full XML for a record

In [8]:
csw.getrecordbyid(id=[record.identifier])


for rec in csw.records:
    r = csw.records[rec]
    records[r.title] = r
    print(r.title)
    xml = ET.fromstring(r.xml)
    print(ET.tostring(xml, encoding="unicode", method="xml"))

The Geoffrey's Tube Z3950 Server (Sample Record - Please Delete!)
<ns0:Record xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:ns0="http://www.opengis.net/cat/csw/2.0.2" xmlns:ns2="http://purl.org/dc/terms/">
    <dc:identifier>9f4c30af-d37f-4239-8b90-14eb7ea9d1d4</dc:identifier>
    <dc:date>2009-10-15</dc:date>
    <dc:title>The Geoffrey's Tube Z3950 Server (Sample Record - Please Delete!)</dc:title>
    <dc:type>service</dc:type>
    <ns2:modified>2010-03-24</ns2:modified>
    <ns2:abstract>This catalog is for registering all metadata records held by the Geofffrey's Tube Palace Hotel Ballroom.</ns2:abstract>
    <dc:description>This catalog is for registering all metadata records held by the Geofffrey's Tube Palace Hotel Ballroom.</dc:description>
  </ns0:Record>


## Convert to python dict with xmltodict

In [9]:
# !pip install xmltodict

In [10]:
import xmltodict

xmltodict.parse(record.xml)

{'csw:SummaryRecord': {'@xmlns:dc': 'http://purl.org/dc/elements/1.1/',
  '@xmlns:geonet': 'http://www.fao.org/geonetwork',
  '@xmlns:dct': 'http://purl.org/dc/terms/',
  '@xmlns:csw': 'http://www.opengis.net/cat/csw/2.0.2',
  '@xmlns:xsi': 'http://www.w3.org/2001/XMLSchema-instance',
  'dc:identifier': '9f4c30af-d37f-4239-8b90-14eb7ea9d1d4',
  'dc:title': "The Geoffrey's Tube Z3950 Server (Sample Record - Please Delete!)",
  'dc:type': 'service',
  'dct:modified': '2010-03-24',
  'dct:abstract': "This catalog is for registering all metadata records held by the Geofffrey's Tube Palace Hotel Ballroom."}}

In [11]:
xmltodict.parse(record.xml, process_namespaces=True)

{'http://www.opengis.net/cat/csw/2.0.2:SummaryRecord': {'http://purl.org/dc/elements/1.1/:identifier': '9f4c30af-d37f-4239-8b90-14eb7ea9d1d4',
  'http://purl.org/dc/elements/1.1/:title': "The Geoffrey's Tube Z3950 Server (Sample Record - Please Delete!)",
  'http://purl.org/dc/elements/1.1/:type': 'service',
  'http://purl.org/dc/terms/:modified': '2010-03-24',
  'http://purl.org/dc/terms/:abstract': "This catalog is for registering all metadata records held by the Geofffrey's Tube Palace Hotel Ballroom."}}

## Use RDFLib to convert to 'proper' JSON-LD

In [12]:
!pip install rdflib



In [13]:
from rdflib import Graph
g = Graph()
g.parse(record.xml, format='application/rdf+xml')
print(g.serialize(format='json-ld'))

[
  {
    "@id": "_:Ne10140929ff543ad8ea08ca480f51aa6",
    "@type": [
      "http://www.opengis.net/cat/csw/2.0.2SummaryRecord"
    ],
    "http://purl.org/dc/elements/1.1/identifier": [
      {
        "@value": "9f4c30af-d37f-4239-8b90-14eb7ea9d1d4"
      }
    ],
    "http://purl.org/dc/elements/1.1/title": [
      {
        "@value": "The Geoffrey's Tube Z3950 Server (Sample Record - Please Delete!)"
      }
    ],
    "http://purl.org/dc/elements/1.1/type": [
      {
        "@value": "service"
      }
    ],
    "http://purl.org/dc/terms/abstract": [
      {
        "@value": "This catalog is for registering all metadata records held by the Geofffrey's Tube Palace Hotel Ballroom."
      }
    ],
    "http://purl.org/dc/terms/modified": [
      {
        "@value": "2010-03-24"
      }
    ]
  }
]


In [14]:
sensor_community_id = "1790de06-d90e-4824-a3b2-f4d6105db1ea"
csw.getrecordbyid(id=[sensor_community_id])


for rec in csw.records:
    r = csw.records[rec]
    records[r.title] = r
    print(r.title)
    xml = ET.fromstring(r.xml)
    print(ET.tostring(xml, encoding="unicode", method="xml"))

Sensor.Commnunity Air Quality Data
<ns0:Record xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:ns0="http://www.opengis.net/cat/csw/2.0.2" xmlns:ns2="http://purl.org/dc/terms/" xmlns:ns3="http://www.opengis.net/ows">
    <dc:identifier>1790de06-d90e-4824-a3b2-f4d6105db1ea</dc:identifier>
    <dc:date>2024-08-29T10:38:52.954Z</dc:date>
    <dc:title>Sensor.Commnunity Air Quality Data</dc:title>
    <dc:subject>PM1</dc:subject>
    <dc:subject>PM2.5</dc:subject>
    <dc:subject>PM10</dc:subject>
    <dc:subject>environment</dc:subject>
    <dc:subject>climatologyMeteorologyAtmosphere</dc:subject>
    <dc:format>CSV</dc:format>
    <ns2:abstract>Sensor.Community is an open source citizen science project collecting various types of environment information.

Useful links
Main landing page: https://sensor.community/
User community forum: https://forum.sensor.community/ 
Sensor.Community Data License: https://opendatacommons.org/licenses/dbcl/1-0/
Official API documentation: https://github.c

In [15]:
for title, record in records.items():
    print(record.title)
    for k in dir(record):
        if not k.startswith("_") and not k in ["xml", "abstract", "title"]: 
            val = getattr(record, k)
            if val: print(f"    {k}: {val}")
    print()

The Geoffrey's Tube Z3950 Server (Sample Record - Please Delete!)
    date: 2009-10-15
    identifier: 9f4c30af-d37f-4239-8b90-14eb7ea9d1d4
    identifiers: [{'scheme': None, 'identifier': '9f4c30af-d37f-4239-8b90-14eb7ea9d1d4'}]
    modified: 2010-03-24
    type: service

Geoscience Australia's Open Day                             Photographs 26th August 2007
    identifier: 59b3d999-ad30-4688-b705-6591d51f72cf
    identifiers: [{'scheme': None, 'identifier': '59b3d999-ad30-4688-b705-6591d51f72cf'}]
    modified: 2007-08-26
    type: collectionSession

Localities in Victoria (VMADMIN.LOCALITY_POLYGON) - Comprehensive Elements
    format: Most popular formats including ESRI shape, MapInfo Tab and Oracle Spatial
    identifier: fd1a659c-3c4c-4519-983a-ba2c5c992009
    identifiers: [{'scheme': None, 'identifier': 'fd1a659c-3c4c-4519-983a-ba2c5c992009'}]
    subjects: ['BOUNDARIES-Administrative', 'LAND-Ownership', 'boundaries']
    type: dataset

Hydrological Basins in Africa (Sample rec

In [19]:
import json

for title, record in records.items():
    print(json.dumps(xmltodict.parse(record.xml), indent=4))
    print()

{
    "csw:Record": {
        "@xmlns:dc": "http://purl.org/dc/elements/1.1/",
        "@xmlns:ows": "http://www.opengis.net/ows",
        "@xmlns:geonet": "http://www.fao.org/geonetwork",
        "@xmlns:dct": "http://purl.org/dc/terms/",
        "@xmlns:csw": "http://www.opengis.net/cat/csw/2.0.2",
        "dc:identifier": "9f4c30af-d37f-4239-8b90-14eb7ea9d1d4",
        "dc:date": "2009-10-15",
        "dc:title": "The Geoffrey's Tube Z3950 Server (Sample Record - Please Delete!)",
        "dc:type": "service",
        "dct:modified": "2010-03-24",
        "dct:abstract": "This catalog is for registering all metadata records held by the Geofffrey's Tube Palace Hotel Ballroom.",
        "dc:description": "This catalog is for registering all metadata records held by the Geofffrey's Tube Palace Hotel Ballroom."
    }
}

{
    "csw:SummaryRecord": {
        "@xmlns:dc": "http://purl.org/dc/elements/1.1/",
        "@xmlns:geonet": "http://www.fao.org/geonetwork",
        "@xmlns:dct": "ht