# Pushing metadata to an FDP

This notebook can be used to manually push DCAT concepts according to the Health-RI Core v2 to a FAIR Data Point. 
If you are looking for interactive explanations, please see the 'Documentation' notebooks.

**Prerequisites:** To execute this notebook in full one needs to have a running FAIR Data Point (FDP) instance with an active write access account.
This notebook is written for the reference implementation, FAIR Data Point version 1.16 with the [Health-RI Core v2 SHACL shapes](https://github.com/Health-RI/health-ri-metadata/tree/develop/Formalisation(shacl)/Core/FairDataPointShape).

## Imports and setup

In [None]:
from typing import List, Union
from pprint import pprint

from rdflib import URIRef, DCTERMS
from pydantic import AnyHttpUrl, Field, field_validator

from getpass import getpass
import dateutil.parser as parser

from fairclient.fdpclient import FDPClient

from sempyro import LiteralField
from sempyro.hri_dcat import (
    HRICatalog, 
    HRIDataset, 
    HRIVCard, 
    HRIAgent, 
    HRIDistribution,
    HRIDataService,
    HRIDatasetSeries,
    GeonovumLicences,
    DatasetStatus,
    DistributionStatus
)
from sempyro.time import PeriodOfTime
from sempyro.dcat import AccessRights, DCATCatalogRecord
from sempyro.utils.validator_functions import force_literal_field
from sempyro.spdx import Checksum

from datetime import datetime
from sempyro.adms import Identifier



In [None]:
# fdp_base=input("Enter base link to FDP: ").rstrip("/")
# username=input("Enter username: ")
# password = getpass(prompt="Password: ")
fdp_base="http://localhost:8081"
username="albert.einstein@example.com"
password = "password"

fdp_client = FDPClient(base_url=fdp_base, username=username, password=password)

In [None]:
class FDPCatalog(HRICatalog):
    is_part_of: [AnyHttpUrl] = Field(
        description="Link to parent object", 
        json_schema_extra={
            "rdf_term": DCTERMS.isPartOf, 
            "rdf_type": "uri"
        })

class FDPDatasetSeries(HRIDatasetSeries):
    is_part_of: [AnyHttpUrl] = Field(
        description="Link to parent object", 
        json_schema_extra={
            "rdf_term": DCTERMS.isPartOf, 
            "rdf_type": "uri"
        })



## Test objects

In [None]:
test_vcard = HRIVCard(
        hasEmail="mailto:data-access-committee@xumc.nl",
        formatted_name="Data Access Committee of the x UMC")
test_agent = HRIAgent(
        name=[LiteralField(value="Academic Medical Center")],
        identifier=["https://ror.org/05wg1m734"],
        homepage=URIRef("https://www.xumc.nl"),
        mbox="mailto:data-access-committee@xumc.nl"
    )
test_datetime = parser.isoparse("2024-07-01T11:11:11")
test_periodoftime = PeriodOfTime(
    start_date=LiteralField(value="2024-07-01T11:11:11Z", datatype="xsd:dateTime"),
    end_date=LiteralField(value="2024-07-01T11:11:11Z", datatype="xsd:dateTime")
)
test_uri = URIRef("http://www.example.com")

### 

test_catalog = FDPCatalog(
    title=[
        LiteralField(value="Test catalog", language="en")
    ],
    description=[
        LiteralField(value="Test catalog description", language="en")
    ],
    contact_point= test_vcard,
    publisher= test_agent,
    dataset=[],
    is_part_of=[URIRef(fdp_base)],
)

test_dataset = HRIDataset(
    contact_point=test_vcard,
    creator=[test_agent],
    description=[LiteralField(value="Test dataset description.")],
    release_date=test_datetime,
    identifier="https://www.example.com/dataset/ZLOYOJ",
    modification_date=parser.isoparse("2024-06-04T13:36:10.246Z"),
    publisher=test_agent,
    theme=[URIRef("http://publications.europa.eu/resource/authority/data-theme/HEAL")],
    title=[LiteralField(value="Test dataset")],
    distribution=[],
    access_rights=AccessRights.restricted,
    keyword=['example'],
    applicable_legislation=["http://data.europa.eu/eli/reg/2025/327/oj"])

test_dataservice = HRIDataService(
    title=[
        LiteralField(value="Test Data Service Title")
    ],
    description=[
        LiteralField(value="Test Data Service Description")
    ],
    endpoint_description=LiteralField(value="Example Data Service Endpoint Description"),
    contact_point= test_vcard,
    access_rights=AccessRights.restricted,
    endpoint_url="https://www.example.com/dataservice/1",
    identifier="www.example.com/dataservice/test1",
    license=GeonovumLicences.niet_open,
    publisher= test_agent,
    theme=[URIRef("http://publications.europa.eu/resource/authority/data-theme/HEAL")],
)

test_distribution = HRIDistribution(
    title=[
        LiteralField(value="CSV-distribution of the questionnaire data of the Personalised RISk-based MAmmascreening Study (PRISMA)")
    ],
    description=[
        LiteralField(value="CSV file containing the questionnaire data of the PRISMA study")
    ],
    access_url=URIRef("https://example.com/dataset/PRISMA/questionnaire.csv"),
    media_type=URIRef("https://www.iana.org/assignments/media-types/text/csv"),
    byte_size=4096,
    license=URIRef("https://definities.geostandaarden.nl/dcat-ap-nl/id/waardelijst/licenties/niet_open"),
    rights="https://www.example.com/contracts/definitely_a_real_DPA.pdf",
    format=URIRef("http://publications.europa.eu/resource/authority/file-type/CSV"))

test_identifier = Identifier(
    notation = LiteralField(value="identification"),
    schema_agency = LiteralField(value="agency")
)

In [None]:
test_catalog_record = test_catalog.to_graph(URIRef(f"https://www.example.com/catalog/test1"))
test_catalog_fdp_url = fdp_client.create_and_publish(resource_type="catalog", metadata=test_catalog_record)
print(test_catalog_fdp_url)

test_dataservice_record = test_dataservice.to_graph(subject=URIRef(test_dataservice.identifier))
test_dataservice_record.add((
    URIRef(test_dataservice.identifier), 
    DCTERMS.isPartOf, 
    URIRef(f"{test_catalog_fdp_url}")
))
test_dataservice_fdp_url = fdp_client.create_and_publish(resource_type="dataservice", metadata=test_dataservice_record)

print(test_dataservice_fdp_url)


test_catalogrecord = DCATCatalogRecord(
    modification_date=test_datetime,
    primary_topic=test_catalog_fdp_url
)
test_catalogrecord_graph = test_catalogrecord.to_graph(subject=URIRef(f"https://www.example.com/catalog/test1"))



## Catalog

In [None]:
# Create a class instance with the same data
fdp_catalog = FDPCatalog(
    title=[
        LiteralField(value="Inflammatory Bowel Disease catalogue", language="en")
    ],
    description=[
        LiteralField(value="This catalogue describes the core metadata of AUMC Inflammatory Bowel Disease datasets", language="en")
    ],
    contact_point= test_vcard,
    publisher= test_agent,
    is_part_of=[URIRef(fdp_base)],
    dataset=[test_dataset],
    creator=[test_agent],
    service=[test_dataservice],
    catalog=[test_catalog],
    applicable_legislation=["http://data.europa.eu/eli/reg/2025/327/oj"],
    has_part=[URIRef("http://localhost:8081/catalog/61ffda03-bcfc-4b21-a92f-f3e930870188")],
    geographical_coverage=[URIRef("http://publications.europa.eu/resource/authority/country/NLD")],
    homepage=URIRef("http://www.example.org"),
    license=URIRef("https://definities.geostandaarden.nl/dcat-ap-nl/id/waardelijst/licenties/niet_open"),
    modification_date=test_datetime,
    catalog_record=[test_catalogrecord],
    release_date=test_datetime,
    rights="https://www.example.com/contracts/definitely_a_real_DPA.pdf",
    temporal_coverage=[test_periodoftime],
    themes=[URIRef("http://publications.europa.eu/resource/authority/data-theme/HEAL")]
)

fdp_catalog_record = fdp_catalog.to_graph(URIRef(f"https://www.example.com/catalog/1"))
print(fdp_catalog_record.serialize())


In [None]:
catalog_fdp_url = fdp_client.create_and_publish(resource_type="catalog", metadata=fdp_catalog_record)
print(catalog_fdp_url)

## Dataset

In [None]:
hri_dataset = HRIDataset(
    contact_point=test_vcard,
    creator=[test_agent],
    description=[LiteralField(value=
                              "The primary aim of the PRISMA study was to investigate the potential value of risk-tailored versus "
                              "traditional breast cancer screening protocols in the Netherlands. Data collection took place between "
                              "2014-2019, resulting in ∼67,000 mammograms, ∼38,000 surveys, ∼10,000 blood samples and ∼600 saliva "
                              "samples.")],
    release_date=test_datetime,
    identifier="https://www.example.com/dataset/ZLOYOJ",
    modification_date=test_datetime,
    publisher=test_agent,
    theme=[URIRef("http://publications.europa.eu/resource/authority/data-theme/HEAL")],
    title=[LiteralField(value="Questionnaire data of the Personalised RISk-based MAmmascreening Study (PRISMA)")],
    distribution=[test_distribution],
    access_rights=URIRef("http://publications.europa.eu/resource/authority/access-right/RESTRICTED"),
    keyword=['example'],
    applicable_legislation=["http://data.europa.eu/eli/reg/2025/327/oj"],
    analytics=[test_distribution],
    code_values=[test_uri],
    coding_system=[test_uri],
    conforms_to=[test_uri],
    frequency=test_uri,
    geographical_coverage=[test_uri],
    has_version=[test_uri],
    health_theme=[test_uri],
    # in_series,
    is_referenced_by=[test_uri],
    language=[test_uri],
    legal_basis=[test_uri],
    maximum_typical_age=99,
    minimum_typical_age=87,
    number_of_records=500,
    number_of_unique_individuals=2,
    other_identifier=[test_identifier],
    personal_data=[test_uri],
    population_coverage=LiteralField(value="Adults aged 18–65 diagnosed with type 2 diabetes in the Netherlands between 2015 and 2020"),
    purpose=[test_uri],
    # qualified_attribution,
    # qualified_relation,
    # qualified_annotation,
    retention_period=test_periodoftime,
    sample=[test_distribution],
    source=[URIRef(test_dataset.identifier)],
    status=DatasetStatus.develop, 
    temporal_coverage=[test_periodoftime],
    temporal_resolution=LiteralField(value="P1Y", datatype="xsd:duration"),
    type=[test_uri],
    version="Version 1",
    version_notes=["This is version 1"],
    # was_generated_by
)

fdp_dataset_record = hri_dataset.to_graph(subject=URIRef(hri_dataset.identifier))
print(fdp_dataset_record.serialize())

In [None]:
fdp_dataset_record.add((
    URIRef(hri_dataset.identifier), 
    DCTERMS.isPartOf, 
    URIRef(catalog_fdp_url)
))
dataset_fdp_url = fdp_client.create_and_publish(resource_type="dataset", metadata=fdp_dataset_record)

print(dataset_fdp_url)

## Distribution

In [None]:
test_checksum = Checksum(
    algorithm=test_uri,
    checksum_value=LiteralField(value="000000", datatype='xsd:hexBinary')
)
hri_distribution = HRIDistribution(
    title=[
        LiteralField(value="CSV-distribution of the questionnaire data of the Personalised RISk-based MAmmascreening Study (PRISMA)")
    ],
    description=[
        LiteralField(value="CSV file containing the questionnaire data of the PRISMA study")
    ],
    access_url=URIRef("https://example.com/dataset/PRISMA/questionnaire.csv"),
    media_type=URIRef("https://www.iana.org/assignments/media-types/text/csv"),
    byte_size=4096,
    license=URIRef("https://definities.geostandaarden.nl/dcat-ap-nl/id/waardelijst/licenties/niet_open"),
    rights="https://www.example.com/contracts/definitely_a_real_DPA.pdf",
    format=URIRef("http://publications.europa.eu/resource/authority/file-type/CSV"),
    access_service=test_dataservice,
    applicable_legislation=[test_uri],
    checksum=test_checksum,
    compression_format=test_uri,
    documentation=[test_uri],
    download_url=test_uri,
    language=[test_uri],
    linked_schemas=[test_uri],
    modification_date=test_datetime,
    packaging_format=test_uri,
    release_date=test_datetime,
    retention_period=[test_periodoftime],
    status=DistributionStatus.develop,
    temporal_resolution=LiteralField(value="P1Y", datatype="xsd:duration")
)
access_url_str = str(hri_distribution.access_url)
distribution_uri = URIRef(f"{hri_dataset.identifier}/distribution/{access_url_str.split('/')[-1]}")
fdp_distribution_record = hri_distribution.to_graph(subject=distribution_uri)
print(fdp_distribution_record.serialize())

In [None]:

fdp_distribution_record.add((distribution_uri, DCTERMS.isPartOf, URIRef(f"{dataset_fdp_url}")))
distribution_fdp_url = fdp_client.create_and_publish(resource_type="distribution", metadata=fdp_distribution_record)

print(distribution_fdp_url)

## Data Service

In [None]:
hri_dataservice = HRIDataService(
    title=[
        LiteralField(value="Example Data Service Title")
    ],
    description=[
        LiteralField(value="Example Data Service Description")
    ],
    endpoint_description=LiteralField(value="Example Data Service Endpoint Description"),
    contact_point=test_vcard,
    access_rights=URIRef("http://publications.europa.eu/resource/authority/access-right/RESTRICTED"),
    endpoint_url="https://www.example.com/dataservice/1",
    identifier="www.example.com/dataservice/1",
    license=URIRef("https://definities.geostandaarden.nl/dcat-ap-nl/id/waardelijst/licenties/niet_open"),
    publisher=test_agent,
    theme=[URIRef("http://publications.europa.eu/resource/authority/data-theme/HEAL")],
    applicable_legislation=[test_uri],
    application_profile=[test_uri],
    creator=[test_agent],
    rights=[test_uri],
    format=[test_uri],
    hvd_category=[test_uri],
    keyword=["test"],
    landing_page=[test_uri],
    language=[test_uri],
    modification_date=test_datetime,
    other_identifier=[test_uri],
    serves_dataset=[URIRef(test_dataset.identifier)]
)

In [None]:
fdp_dataservice_record = hri_dataservice.to_graph(subject=URIRef(hri_dataservice.identifier))
fdp_dataservice_record.add((
    URIRef(hri_dataservice.identifier), 
    DCTERMS.isPartOf, 
    URIRef(f"{catalog_fdp_url}")
))
dataservice_fdp_url = fdp_client.create_and_publish(resource_type="dataservice", metadata=fdp_dataservice_record)

print(dataservice_fdp_url)

## Dataset Series

In [None]:
fdp_datasetseries = FDPDatasetSeries(
    title=[
        LiteralField(value="Example Dataset Series title")
    ],
    description=[
        LiteralField(value="Example Dataset Series description")
    ],
    is_part_of=[URIRef(dataset_fdp_url)],
    applicable_legislation,
    contact_point,
    frequency,
    geographical_coverage,
    modification_date,
    publisher,
    release_date,
    temporal_coverage,
    title
)
fdp_datasetseries_record = fdp_datasetseries.to_graph(URIRef(f"https://www.example.com/datasetseries/1"))
print(fdp_datasetseries_record.serialize())

In [None]:
datasetseries_fdp_url = fdp_client.create_and_publish(resource_type="datasetseries", metadata=fdp_datasetseries_record)

print(datasetseries_fdp_url)