# PRISMA Search Execution Dataset Nanopublication Generator

This notebook generates a nanopublication for documenting search execution and results following PRISMA 2020 guidelines.

**Template:** [Declaring a PRISMA search execution dataset](https://w3id.org/np/RAV_H3udaSzxYOhhR0t-q7PKS6URwauD_Z5sMLbHmM2x0) by Tobias Kuhn (2025-11-21)

## Workflow Position
1. PICO Research Question ✅
2. Search Strategy ✅
3. **Search Execution Dataset** ← This notebook
4. Study Inclusion
5. Study Assessment Dataset

## Step 1: Configuration

Set the path to your JSON configuration file:

In [2]:
# Path to your JSON configuration file
SEARCH_EXECUTION_FILE = "../inputs/search-execution-quantum-biodiversity_updated.json"

## Step 2: Load Configuration

In [3]:
import json
from pathlib import Path

with open(SEARCH_EXECUTION_FILE, 'r') as f:
    config = json.load(f)

print(f"Loaded configuration for: {config['search_execution_dataset']['label']}")
print(f"Author: {config['author']['name']} ({config['author']['orcid']})")
print(f"Part of: {config['search_execution_dataset']['part_of']}")

Loaded configuration for: Quantum Computing for Biodiversity - Search Execution Results
Author: Anne Fouilloux (0000-0002-1784-2920)
Part of: https://w3id.org/np/RA8B3ptXUOsN7obpkFGtA0FBmsh0OnID53wOsUIpSKTcg


## Step 3: Setup Namespaces and Imports

In [4]:
from datetime import datetime, timezone
from rdflib import Graph, Dataset, Namespace, URIRef, Literal, BNode
from rdflib.namespace import RDF, RDFS, XSD, FOAF, PROV, DCTERMS as DCT

# Define namespaces
NP = Namespace("http://www.nanopub.org/nschema#")
NPX = Namespace("http://purl.org/nanopub/x/")
NT = Namespace("https://w3id.org/np/o/ntemplate/")
ORCID = Namespace("https://orcid.org/")
SLV = Namespace("https://w3id.org/sciencelive/o/terms/")

# Temporary namespace for building (will be replaced when signed)
TEMP_NP = Namespace("http://purl.org/nanopub/temp/np/")

# Template URIs
SEARCH_EXECUTION_TEMPLATE = URIRef("https://w3id.org/np/RAV_H3udaSzxYOhhR0t-q7PKS6URwauD_Z5sMLbHmM2x0")
PROVENANCE_TEMPLATE = URIRef("https://w3id.org/np/RA7lSq6MuK_TIC6JMSHvLtee3lpLoZDOqLJCLXevnrPoU")
PUBINFO_TEMPLATE_1 = URIRef("https://w3id.org/np/RA0J4vUn_dekg-U1kK3AOEt02p9mT2WO03uGxLDec1jLw")
PUBINFO_TEMPLATE_2 = URIRef("https://w3id.org/np/RAukAcWHRDlkqxk7H2XNSegc1WnHI569INvNr-xdptDGI")

print("Namespaces configured.")

Namespaces configured.


## Step 4: Build the Nanopublication

In [5]:
# Create Dataset with all namespace bindings
ds = Dataset()
ds.bind("this", "http://purl.org/nanopub/temp/np/")
ds.bind("sub", TEMP_NP)
ds.bind("np", NP)
ds.bind("dct", DCT)
ds.bind("nt", NT)
ds.bind("npx", NPX)
ds.bind("xsd", XSD)
ds.bind("rdfs", RDFS)
ds.bind("orcid", ORCID)
ds.bind("prov", PROV)
ds.bind("foaf", FOAF)
ds.bind("slv", SLV)

# URIs for nanopub structure
np_uri = URIRef("http://purl.org/nanopub/temp/np/")
head_uri = TEMP_NP.Head
assertion_uri = TEMP_NP.assertion
provenance_uri = TEMP_NP.provenance
pubinfo_uri = TEMP_NP.pubinfo

# Main resource URI
search_execution_uri = TEMP_NP.searchExecutionDataset

# Author ORCID
author_uri = ORCID[config['author']['orcid']]

print("Dataset initialized with namespace bindings.")

Dataset initialized with namespace bindings.


In [6]:
# HEAD graph
head = ds.graph(head_uri)
head.add((np_uri, RDF.type, NP.Nanopublication))
head.add((np_uri, NP.hasAssertion, assertion_uri))
head.add((np_uri, NP.hasProvenance, provenance_uri))
head.add((np_uri, NP.hasPublicationInfo, pubinfo_uri))

print("Head graph created.")

Head graph created.


In [7]:
# ASSERTION graph
assertion = ds.graph(assertion_uri)
sed = config['search_execution_dataset']

# Type
assertion.add((search_execution_uri, RDF.type, SLV.SearchExecutionDataset))

# Label
assertion.add((search_execution_uri, RDFS.label, Literal(sed['label'])))

# Part of (links to PICO/systematic review)
assertion.add((search_execution_uri, DCT.isPartOf, URIRef(sed['part_of'])))

# Creation date
assertion.add((search_execution_uri, DCT.created, Literal(sed['creation_date'], datatype=XSD.date)))

# Database searches (repeatable)
# Handle both formats: list of URIs OR list of objects with search details
for idx, db_search in enumerate(sed.get('db_searches', [])):
    if isinstance(db_search, str):
        # Simple URI format
        if db_search:
            assertion.add((search_execution_uri, SLV.includesDbSearch, URIRef(db_search)))
    elif isinstance(db_search, dict):
        # Rich object format - create a blank node for each search
        search_node = TEMP_NP[f"dbSearch{idx+1}"]
        assertion.add((search_execution_uri, SLV.includesDbSearch, search_node))
        
        if db_search.get('database_url'):
            assertion.add((search_node, SLV.hasDatabase, URIRef(db_search['database_url'])))
        if db_search.get('database_label'):
            assertion.add((search_node, RDFS.label, Literal(db_search['database_label'])))
        if db_search.get('search_query'):
            assertion.add((search_node, SLV.hasSearchQuery, Literal(db_search['search_query'])))
        if db_search.get('filters'):
            assertion.add((search_node, SLV.hasFilters, Literal(db_search['filters'])))
        if db_search.get('results_count') is not None:
            assertion.add((search_node, SLV.hasResultsCount, Literal(str(db_search['results_count']))))
        if db_search.get('export_format'):
            assertion.add((search_node, SLV.hasExportFormat, Literal(db_search['export_format'])))
        if db_search.get('notes'):
            assertion.add((search_node, RDFS.comment, Literal(db_search['notes'])))

# Deduplication methodology
if sed.get('deduplication_methodology'):
    assertion.add((search_execution_uri, SLV.usesDeduplicationMethodology, Literal(sed['deduplication_methodology'])))

# Review methodology
if sed.get('review_methodology'):
    assertion.add((search_execution_uri, SLV.usesReviewMethodology, Literal(sed['review_methodology'])))

# Screening methodology
if sed.get('screening_methodology'):
    assertion.add((search_execution_uri, SLV.usesScreeningMethodology, Literal(sed['screening_methodology'])))

# Record counts
if sed.get('screened_record_count'):
    assertion.add((search_execution_uri, SLV.hasScreenedRecordCount, Literal(sed['screened_record_count'])))

if sed.get('fulltext_screened_record_count'):
    assertion.add((search_execution_uri, SLV.hasFulltextScreenedRecordCount, Literal(sed['fulltext_screened_record_count'])))

if sed.get('final_included_study_count'):
    assertion.add((search_execution_uri, SLV.hasFinalIncludedStudyCount, Literal(sed['final_included_study_count'])))

# Exclusion breakdown
if sed.get('exclusion_breakdown'):
    assertion.add((search_execution_uri, SLV.hasExclusionBreakdown, Literal(sed['exclusion_breakdown'])))

# Dataset file location
if sed.get('dataset_file_location') and sed['dataset_file_location'].strip():
    assertion.add((search_execution_uri, SLV.hasDatasetFileLocation, URIRef(sed['dataset_file_location'])))

# Limitations (optional)
if sed.get('limitations'):
    assertion.add((search_execution_uri, SLV.hasLimitations, Literal(sed['limitations'])))

print(f"Assertion graph created with {len(assertion)} triples.")

Assertion graph created with 60 triples.


In [8]:
# PROVENANCE graph
provenance = ds.graph(provenance_uri)
provenance.add((assertion_uri, PROV.wasAttributedTo, author_uri))

print("Provenance graph created.")

Provenance graph created.


In [9]:
# PUBINFO graph
pubinfo = ds.graph(pubinfo_uri)

# Author info
pubinfo.add((author_uri, FOAF.name, Literal(config['author']['name'])))

# Nanopub metadata
timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z"
pubinfo.add((np_uri, DCT.created, Literal(timestamp, datatype=XSD.dateTime)))
pubinfo.add((np_uri, DCT.creator, author_uri))
pubinfo.add((np_uri, DCT.license, URIRef("https://creativecommons.org/licenses/by/4.0/")))

# Introduces the search execution dataset
pubinfo.add((np_uri, NPX.introduces, search_execution_uri))

# Created at Nanodash
pubinfo.add((np_uri, NPX.wasCreatedAt, URIRef("https://nanodash.knowledgepixels.com/")))

# Label for the nanopub (truncated to ~50 chars)
label = sed['label'][:50] + "..." if len(sed['label']) > 50 else sed['label']
pubinfo.add((np_uri, RDFS.label, Literal(label)))

# Template references
pubinfo.add((np_uri, NT.wasCreatedFromTemplate, SEARCH_EXECUTION_TEMPLATE))
pubinfo.add((np_uri, NT.wasCreatedFromProvenanceTemplate, PROVENANCE_TEMPLATE))
pubinfo.add((np_uri, NT.wasCreatedFromPubinfoTemplate, PUBINFO_TEMPLATE_1))
pubinfo.add((np_uri, NT.wasCreatedFromPubinfoTemplate, PUBINFO_TEMPLATE_2))

print(f"Pubinfo graph created with {len(pubinfo)} triples.")

Pubinfo graph created with 11 triples.


## Step 5: Serialize and Save

In [10]:
# Serialize to TriG format
output_filename = config['output']['filename'] + ".trig"
output_path = Path(output_filename)

trig_output = ds.serialize(format='trig')
output_path.write_text(trig_output)

print(f"Nanopub saved to: {output_path}")
print(f"File size: {output_path.stat().st_size} bytes")

Nanopub saved to: quantum-biodiversity-search-execution.trig
File size: 6879 bytes


In [11]:
# Display the generated TriG
print("=" * 60)
print("Generated TriG:")
print("=" * 60)
print(trig_output)

Generated TriG:
@prefix dct: <http://purl.org/dc/terms/> .
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
@prefix np: <http://www.nanopub.org/nschema#> .
@prefix npx: <http://purl.org/nanopub/x/> .
@prefix nt: <https://w3id.org/np/o/ntemplate/> .
@prefix orcid: <https://orcid.org/> .
@prefix prov: <http://www.w3.org/ns/prov#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix slv: <https://w3id.org/sciencelive/o/terms/> .
@prefix sub: <http://purl.org/nanopub/temp/np/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

sub:provenance {
    sub:assertion prov:wasAttributedTo orcid:0000-0002-1784-2920 .
}

sub:assertion {
    sub:searchExecutionDataset a slv:SearchExecutionDataset ;
        rdfs:label "Quantum Computing for Biodiversity - Search Execution Results" ;
        dct:created "2025-12-26"^^xsd:date ;
        dct:isPartOf <https://w3id.org/np/RA8B3ptXUOsN7obpkFGtA0FBmsh0OnID53wOsUIpSKTcg> ;
        slv:hasExclusionBreakdown "Title/abstract exclusions: Not about q

## Step 6: Validate (Optional)

Load the generated file to verify it can be parsed by the nanopub library:

In [12]:
from nanopub import Nanopub, NanopubConf, load_profile

# Load and validate (without signing)
try:
    # Just load to check RDF structure is valid
    np_obj = Nanopub(rdf=output_path)
    print("✅ Nanopub RDF structure is valid!")
    print(f"   Assertion triples: {len(np_obj.assertion)}")
    print(f"   Provenance triples: {len(np_obj.provenance)}")
    print(f"   Pubinfo triples: {len(np_obj.pubinfo)}")
except Exception as e:
    print(f"❌ Validation error: {e}")

✅ Nanopub RDF structure is valid!
   Assertion triples: 60
   Provenance triples: 1
   Pubinfo triples: 11


## Step 7: Sign and Publish (Optional)

Uncomment to sign and publish:

In [13]:
# Sign and Publish
PUBLISH = True
USE_TEST_SERVER = False

if PUBLISH:
    from nanopub import Nanopub, NanopubConf, load_profile
    
    profile = load_profile()
    print(f"Loaded profile: {profile.name}")
    
    conf = NanopubConf(profile=profile, use_test_server=USE_TEST_SERVER)
    np_obj = Nanopub(rdf=output_path, conf=conf)
    
    np_obj.sign()
    print(f"✓ Signed")
    
    signed_path = Path(f"{config['output']['filename']}.signed.trig")
    np_obj.store(signed_path)
    print(f"✓ Saved: {signed_path}")
    
    np_obj.publish()
    print(f"✓ Published: {np_obj.source_uri}")
else:
    print("Publishing disabled. Set PUBLISH = True to enable.")

Loaded profile: Anne Fouilloux
✓ Signed
✓ Saved: quantum-biodiversity-search-execution.signed.trig
✓ Published: https://w3id.org/np/RAMPy96eCLCXlGR9VvCVf6rJmpN_DlxxarMGm91_5n-O8


---

## JSON Configuration Schema

```json
{
  "author": {
    "orcid": "0000-0002-1784-2920",
    "name": "Anne Fouilloux"
  },
  "search_execution_dataset": {
    "label": "My Search Execution Dataset",
    "part_of": "https://w3id.org/np/...",  // URI of PICO/systematic review
    "creation_date": "2025-01-31",  // Date search was completed
    "db_searches": [  // URIs to database search nanopubs (optional, repeatable)
      "https://w3id.org/np/.../search"
    ],
    "deduplication_methodology": "Description of deduplication process",
    "review_methodology": "Number of reviewers, independence, disagreement resolution",
    "screening_methodology": "Title/abstract and full-text screening methods",
    "screened_record_count": "1000",  // Total records screened
    "fulltext_screened_record_count": "200",  // Records at full-text stage
    "final_included_study_count": "50",  // Final included studies
    "exclusion_breakdown": "Exclusion reasons with counts",
    "dataset_file_location": "https://zenodo.org/...",  // URL to deposited data
    "limitations": "Optional: search limitations and notes"  // Optional
  },
  "output": {
    "filename": "my-search-execution"  // .trig will be appended
  }
}
```