In [1]:
pip install owslib rdflib


Collecting owslib
  Using cached OWSLib-0.31.0-py2.py3-none-any.whl.metadata (6.7 kB)
Collecting rdflib
  Downloading rdflib-7.0.0-py3-none-any.whl.metadata (11 kB)
Collecting lxml (from owslib)
  Downloading lxml-5.3.0-cp311-cp311-macosx_10_9_universal2.whl.metadata (3.8 kB)
Collecting pytz (from owslib)
  Using cached pytz-2024.1-py2.py3-none-any.whl.metadata (22 kB)
Collecting pyyaml (from owslib)
  Downloading PyYAML-6.0.2-cp311-cp311-macosx_11_0_arm64.whl.metadata (2.1 kB)
Collecting requests>=1.0 (from owslib)
  Using cached requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)
Collecting isodate<0.7.0,>=0.6.0 (from rdflib)
  Downloading isodate-0.6.1-py2.py3-none-any.whl.metadata (9.6 kB)
Collecting pyparsing<4,>=2.1.0 (from rdflib)
  Downloading pyparsing-3.1.4-py3-none-any.whl.metadata (5.1 kB)
Collecting charset-normalizer<4,>=2 (from requests>=1.0->owslib)
  Using cached charset_normalizer-3.3.2-cp311-cp311-macosx_11_0_arm64.whl.metadata (33 kB)
Collecting idna<4,>=2.5 (from re

In [None]:
from owslib.csw import CatalogueServiceWeb
from rdflib import Graph, Literal, Namespace, RDF, URIRef
from rdflib.namespace import DCTERMS, DCAT, FOAF, XSD

# Define namespaces for GeoDCAT
LOCN = Namespace("http://www.w3.org/ns/locn#")
GEOSPARQL = Namespace("http://www.opengis.net/ont/geosparql#")
GEO = Namespace("http://www.w3.org/2003/01/geo/wgs84_pos#")

# Define a function to convert CSW record to GeoDCAT
def csw_record_to_geodcat(record):
    graph = Graph()
    dcat_dataset = URIRef(f"https://example.com/dataset/{record.identifier}")
    
    # Add basic dataset properties
    graph.add((dcat_dataset, RDF.type, DCAT.Dataset))
    graph.add((dcat_dataset, DCTERMS.identifier, Literal(record.identifier)))
    graph.add((dcat_dataset, DCTERMS.title, Literal(record.title)))
    graph.add((dcat_dataset, DCTERMS.description, Literal(record.abstract)))
    
    if record.date:
        graph.add((dcat_dataset, DCTERMS.issued, Literal(record.date, datatype=XSD.date)))
    
    # Add publisher (if exists)
    if record.responsibleParties:
        for party in record.responsibleParties:
            publisher = URIRef(f"https://example.com/publisher/{party['organisationName']}")
            graph.add((dcat_dataset, DCTERMS.publisher, publisher))
            graph.add((publisher, RDF.type, FOAF.Organization))
            graph.add((publisher, DCTERMS.title, Literal(party['organisationName'])))
    
    # Add spatial information (if exists)
    if record.bbox:
        bbox = record.bbox
        coordinates = f"POLYGON(({bbox.minx} {bbox.miny}, {bbox.maxx} {bbox.miny}, {bbox.maxx} {bbox.maxy}, {bbox.minx} {bbox.maxy}, {bbox.minx} {bbox.miny}))"
        graph.add((dcat_dataset, DCTERMS.spatial, Literal(coordinates, datatype=GEOSPARQL.wktLiteral)))
    
    # Add keywords (if exist)
    if record.subjects:
        for keyword in record.subjects:
            graph.add((dcat_dataset, DCAT.keyword, Literal(keyword)))

    return graph

# Function to fetch CSW records and translate to GeoDCAT
def fetch_and_translate_csw_to_geodcat(csw_url, output_format='turtle'):
    # Connect to the CSW service
    csw = CatalogueServiceWeb(csw_url)

    # Fetch CSW records (we'll limit it to 5 records for demonstration)
    csw.getrecords2(maxrecords=5, esn='full')

    # Create a new RDF graph for all records
    all_records_graph = Graph()

    # Iterate through CSW records and translate them to GeoDCAT
    for rec_id, rec in csw.records.items():
        record_graph = csw_record_to_geodcat(rec)
        all_records_graph += record_graph

    # Serialize the result into the desired RDF format
    return all_records_graph.serialize(format=output_format).decode('utf-8')

# Example usage
csw_url = "https://example.com/csw"  # Replace with your CSW service URL
rdf_output = fetch_and_translate_csw_to_geodcat(csw_url)
print(rdf_output)
