# Pushing metadata to an FDP

This notebook can be used to manually push DCAT concepts according to the Health-RI Core v2 to a FAIR Data Point. 
If you are looking for interactive explanations, please see the 'Documentation' notebooks.

**Prerequisites:** To execute this notebook in full one needs to have a running FAIR Data Point (FDP) instance with an active write access account.
This notebook is written for the reference implementation, FAIR Data Point version 1.16 with the [Health-RI Core v2 SHACL shapes](https://github.com/Health-RI/health-ri-metadata/tree/develop/Formalisation(shacl)/Core/FairDataPointShape).

## Imports and setup

In [1]:
from typing import List, Union
from pprint import pprint

from rdflib import URIRef, DCTERMS
from pydantic import AnyHttpUrl, Field, field_validator

from getpass import getpass
import dateutil.parser as parser

from fairclient.fdpclient import FDPClient

from sempyro import LiteralField
from sempyro.hri_dcat import (
    HRICatalog, 
    HRIDataset, 
    HRIVCard, 
    HRIAgent, 
    HRIDistribution,
    HRIDataService,
    HRIDatasetSeries
)
from sempyro.utils.validator_functions import force_literal_field

  warn(


In [2]:
fdp_base=input("Enter base link to FDP: ").rstrip("/")
username=input("Enter username: ")
password = getpass(prompt="Password: ")

fdp_client = FDPClient(base_url=fdp_base, username=username, password=password)

Enter base link to FDP:  http://localhost:8081
Enter username:  albert.einstein@example.com
Password:  ········


In [4]:
class FDPCatalog(HRICatalog):
    is_part_of: [AnyHttpUrl] = Field(
        description="Link to parent object", 
        json_schema_extra={
            "rdf_term": DCTERMS.isPartOf, 
            "rdf_type": "uri"
        })

class FDPDatasetSeries(HRIDatasetSeries):
    is_part_of: [AnyHttpUrl] = Field(
        description="Link to parent object", 
        json_schema_extra={
            "rdf_term": DCTERMS.isPartOf, 
            "rdf_type": "uri"
        })



## Catalog

In [6]:
# Create a class instance with the same data
fdp_catalog = FDPCatalog(
    title=[
        LiteralField(value="Inflammatory Bowel Disease catalogue", language="en")
    ],
    description=[
        LiteralField(value="This catalogue describes the core metadata of AUMC Inflammatory Bowel Disease datasets", language="en")
    ],
    contact_point=HRIVCard(
        hasEmail="mailto:data-access-committee@xumc.nl",
        formatted_name="Data Access Committee of the x UMC"),
    publisher=HRIAgent(
        name=[LiteralField(value="Academic Medical Center")],
        identifier=["https://ror.org/05wg1m734"],
        homepage=URIRef("https://www.xumc.nl"),
        mbox="mailto:data-access-committee@xumc.nl"
    ),
    is_part_of=[URIRef(fdp_base)],
    dataset=[])

fdp_catalog_record = fdp_catalog.to_graph(URIRef(f"https://www.example.com/catalog/1"))
print(fdp_catalog_record.serialize())


@prefix dcat: <http://www.w3.org/ns/dcat#> .
@prefix dcterms: <http://purl.org/dc/terms/> .
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
@prefix v: <http://www.w3.org/2006/vcard/ns#> .

<https://www.example.com/catalog/1> a dcat:Catalog ;
    dcterms:description "This catalogue describes the core metadata of AUMC Inflammatory Bowel Disease datasets"@en ;
    dcterms:isPartOf <http://localhost:8081> ;
    dcterms:publisher [ a foaf:Agent ;
            dcterms:identifier "https://ror.org/05wg1m734" ;
            foaf:homepage <https://www.xumc.nl/> ;
            foaf:mbox <mailto:data-access-committee@xumc.nl> ;
            foaf:name "Academic Medical Center" ] ;
    dcterms:title "Inflammatory Bowel Disease catalogue"@en ;
    dcat:contactPoint [ a v:Kind ;
            v:fn "Data Access Committee of the x UMC" ;
            v:hasEmail <mailto:data-access-committee@xumc.nl> ] .




In [7]:
catalog_fdp_url = fdp_client.create_and_publish(resource_type="catalog", metadata=fdp_catalog_record)
print(catalog_fdp_url)

http://localhost:8081/catalog/af57d7c9-2c1c-45d6-abd4-50dd4753874e


## Dataset

In [9]:
hri_dataset = HRIDataset(
    contact_point=HRIVCard(
        hasEmail="mailto:data-access-committee@xumc.nl",
        formatted_name="Data Access Committee of the x UMC")
    ,
    creator=[HRIAgent(
        name=["Academic Medical Center"], 
        identifier=["https://ror.org/05wg1m734"],
        homepage="https://www.xumc.nl",
        mbox="mailto:data-access-committee@xumc.nl"    
    )],
    description=[LiteralField(value=
                              "The primary aim of the PRISMA study was to investigate the potential value of risk-tailored versus "
                              "traditional breast cancer screening protocols in the Netherlands. Data collection took place between "
                              "2014-2019, resulting in ∼67,000 mammograms, ∼38,000 surveys, ∼10,000 blood samples and ∼600 saliva "
                              "samples.")],
    issued=parser.isoparse("2024-07-01T11:11:11"),
    identifier="https://www.example.com/dataset/ZLOYOJ",
    modified=parser.isoparse("2024-06-04T13:36:10.246Z"),
    publisher=HRIAgent(
        name=["Academic Medical Center"], 
        identifier=["https://ror.org/05wg1m734"],
        homepage="https://www.xumc.nl",
        mbox="mailto:data-access-committee@xumc.nl"    
    ),
    theme=[URIRef("http://publications.europa.eu/resource/authority/data-theme/HEAL")],
    title=[LiteralField(value="Questionnaire data of the Personalised RISk-based MAmmascreening Study (PRISMA)")],
    distribution=[],
    access_rights=URIRef("http://publications.europa.eu/resource/authority/access-right/RESTRICTED"),
    keyword=['example'],
    applicable_legislation=["http://data.europa.eu/eli/reg/2025/327/oj"]
)

In [10]:
fdp_dataset_record = hri_dataset.to_graph(subject=URIRef(hri_dataset.identifier))
fdp_dataset_record.add((
    URIRef(hri_dataset.identifier), 
    DCTERMS.isPartOf, 
    URIRef(catalog_fdp_url)
))
dataset_fdp_url = fdp_client.create_and_publish(resource_type="dataset", metadata=fdp_dataset_record)

print(dataset_fdp_url)

http://localhost:8081/dataset/ac55d445-d1c9-4d58-9eb1-b478def447a8


## Distribution

In [11]:
hri_distribution = HRIDistribution(
    title=[
        LiteralField(value="CSV-distribution of the questionnaire data of the Personalised RISk-based MAmmascreening Study (PRISMA)")
    ],
    description=[
        LiteralField(value="CSV file containing the questionnaire data of the PRISMA study")
    ],
    access_url=URIRef("https://example.com/dataset/PRISMA/questionnaire.csv"),
    media_type=URIRef("https://www.iana.org/assignments/media-types/text/csv"),
    byte_size=4096,
    license=URIRef("https://definities.geostandaarden.nl/dcat-ap-nl/id/waardelijst/licenties/niet_open"),
    rights="https://www.example.com/contracts/definitely_a_real_DPA.pdf",
    format=URIRef("http://publications.europa.eu/resource/authority/file-type/CSV")
)

In [12]:
access_url_str = str(hri_distribution.access_url)
distribution_uri = URIRef(f"{hri_dataset.identifier}/distribution/{access_url_str.split('/')[-1]}")
fdp_distribution_record = hri_distribution.to_graph(subject=distribution_uri)
fdp_distribution_record.add((distribution_uri, DCTERMS.isPartOf, URIRef(f"{dataset_fdp_url}")))
distribution_fdp_url = fdp_client.create_and_publish(resource_type="distribution", metadata=fdp_distribution_record)

print(distribution_fdp_url)

http://localhost:8081/distribution/f8aac441-6195-4c06-b58c-adcd41d48fed


## Data Service

In [13]:
hri_dataservice = HRIDataService(
    title=[
        LiteralField(value="Example Data Service Title")
    ],
    description=[
        LiteralField(value="Example Data Service Description")
    ],
    endpoint_description=LiteralField(value="Example Data Service Endpoint Description"),
    contact_point=HRIVCard(
        hasEmail="mailto:data-access-committee@xumc.nl",
        formatted_name="Data Access Committee of the x UMC"),
    access_rights=URIRef("http://publications.europa.eu/resource/authority/access-right/RESTRICTED"),
    endpoint_url="https://www.example.com/dataservice/1",
    identifier="www.example.com/dataservice/1",
    license=URIRef("https://definities.geostandaarden.nl/dcat-ap-nl/id/waardelijst/licenties/niet_open"),
    publisher=HRIAgent(
        name=["Academic Medical Center"], 
        identifier=["https://ror.org/05wg1m734"],
        homepage="https://www.xumc.nl",
        mbox="mailto:data-access-committee@xumc.nl"    
    ),
    theme=[URIRef("http://publications.europa.eu/resource/authority/data-theme/HEAL")]
)

In [15]:
fdp_dataservice_record = hri_dataservice.to_graph(subject=URIRef(hri_dataservice.identifier))
fdp_dataservice_record.add((
    URIRef(hri_dataservice.identifier), 
    DCTERMS.isPartOf, 
    URIRef(f"{catalog_fdp_url}")
))
dataservice_fdp_url = fdp_client.create_and_publish(resource_type="dataservice", metadata=fdp_dataservice_record)

print(dataservice_fdp_url)

http://localhost:8081/dataservice/cf515caa-682a-4c85-a295-23bcba6fa70d


## Dataset Series

In [18]:
fdp_datasetseries = FDPDatasetSeries(
    title=[
        LiteralField(value="Example Dataset Series title")
    ],
    description=[
        LiteralField(value="Example Dataset Series description")
    ],
    is_part_of=[URIRef(dataset_fdp_url)],
)
fdp_datasetseries_record = fdp_datasetseries.to_graph(URIRef(f"https://www.example.com/datasetseries/1"))
print(fdp_datasetseries_record.serialize())

@prefix dcat: <http://www.w3.org/ns/dcat#> .
@prefix dcterms: <http://purl.org/dc/terms/> .

<https://www.example.com/datasetseries/1> a dcat:DatasetSeries ;
    dcterms:description "Example Dataset Series description" ;
    dcterms:isPartOf <http://localhost:8081/dataset/ac55d445-d1c9-4d58-9eb1-b478def447a8> ;
    dcterms:title "Example Dataset Series title" .




In [19]:
datasetseries_fdp_url = fdp_client.create_and_publish(resource_type="datasetseries", metadata=fdp_datasetseries_record)

print(datasetseries_fdp_url)

http://localhost:8081/datasetseries/a3fbdc27-7816-4a17-9c20-a14054d26989
