# Software Nanopublication Generator (Corrected)

Creates software description nanopublications from a JSON configuration file.

**Template:** [Software Description Template](https://w3id.org/np/RABBzVTxosLGT4YBCfdfNd6LyuOOTe2EVOTtWJMyOoZHk)

## Software Nanopublications
Document research software with:
- Title and description
- Repository/maintainer link
- Related publications (via cito:supports)
- Related resources (datasets, websites, other nanopubs)

---

## Changes from Original
- Uses `rdflib.Dataset` instead of string concatenation
- Uses temporary namespace `http://purl.org/nanopub/temp/np/` (replaced on signing)
- Proper signing workflow with nanopub library

---
# üìù SECTION 1: INPUT FILE (EDIT THIS)
---

In [None]:
# Path to your Software JSON config file
CONFIG_FILE = "../config/vbae208/vbae208_software.json"
OUTPUT_DIR = "../output/software"

---
# ‚öôÔ∏è SECTION 2: SETUP
---

In [None]:
import json
import re
from pathlib import Path
from datetime import datetime, timezone
from rdflib import Dataset, Namespace, URIRef, Literal
from rdflib.namespace import RDF, RDFS, XSD, FOAF

# Namespaces
NP = Namespace("http://www.nanopub.org/nschema#")
DCT = Namespace("http://purl.org/dc/terms/")
NT = Namespace("https://w3id.org/np/o/ntemplate/")
NPX = Namespace("http://purl.org/nanopub/x/")
PROV = Namespace("http://www.w3.org/ns/prov#")
ORCID = Namespace("https://orcid.org/")
CITO = Namespace("http://purl.org/spar/cito/")
SKOS = Namespace("http://www.w3.org/2004/02/skos/core#")
SCHEMA = Namespace("https://schema.org/")
DCMITYPE = Namespace("http://purl.org/dc/dcmitype/")

# Template URIs
SOFTWARE_TEMPLATE = URIRef("https://w3id.org/np/RABBzVTxosLGT4YBCfdfNd6LyuOOTe2EVOTtWJMyOoZHk")
PROV_TEMPLATE = URIRef("https://w3id.org/np/RA7lSq6MuK_TIC6JMSHvLtee3lpLoZDOqLJCLXevnrPoU")
PUBINFO_TEMPLATE_1 = URIRef("https://w3id.org/np/RA0J4vUn_dekg-U1kK3AOEt02p9mT2WO03uGxLDec1jLw")
PUBINFO_TEMPLATE_2 = URIRef("https://w3id.org/np/RAoTD7udB2KtUuOuAe74tJi1t3VzK0DyWS7rYVAq1GRvw")
PUBINFO_TEMPLATE_3 = URIRef("https://w3id.org/np/RAukAcWHRDlkqxk7H2XNSegc1WnHI569INvNr-xdptDGI")

def slugify(text):
    """Convert text to URL-safe slug."""
    text = text.lower()
    text = re.sub(r'[^\w\s-]', '', text)
    text = re.sub(r'[\s_]+', '-', text)
    return text.strip('-')

print("‚úì Setup complete")

---
# üìñ SECTION 3: LOAD & VALIDATE
---

In [None]:
# Load configuration
print(f"Loading: {CONFIG_FILE}")

with open(CONFIG_FILE, 'r', encoding='utf-8') as f:
    config = json.load(f)

metadata = config.get('metadata', {})
AUTHOR_ORCID = metadata.get('creator_orcid')
AUTHOR_NAME = metadata.get('creator_name')
IS_PART_OF = metadata.get('is_part_of', {})

errors = []
if not AUTHOR_ORCID:
    errors.append("metadata.creator_orcid is required")
if not AUTHOR_NAME:
    errors.append("metadata.creator_name is required")
if not config.get('nanopublications'):
    errors.append("nanopublications list is required")

if errors:
    print("‚ùå Validation errors:")
    for e in errors:
        print(f"   - {e}")
    raise ValueError("Please fix the errors")

print(f"‚úì Loaded {len(config['nanopublications'])} software nanopubs to generate")
print(f"‚úì Author: {AUTHOR_NAME} ({AUTHOR_ORCID})")

---
# üî® SECTION 4: BUILD NANOPUBLICATIONS
---

In [None]:
def create_software_nanopub(np_config, metadata):
    """
    Create a Software nanopublication using rdflib Dataset.
    """
    TEMP_NP = Namespace("http://purl.org/nanopub/temp/np/")
    
    this_np = URIRef("http://purl.org/nanopub/temp/np/")
    head_graph = URIRef("http://purl.org/nanopub/temp/np/Head")
    assertion_graph = URIRef("http://purl.org/nanopub/temp/np/assertion")
    provenance_graph = URIRef("http://purl.org/nanopub/temp/np/provenance")
    pubinfo_graph = URIRef("http://purl.org/nanopub/temp/np/pubinfo")
    
    author_uri = ORCID[metadata['creator_orcid']]
    
    # Create software URI from title slug
    title = np_config['title']
    software_slug = slugify(title)
    software_uri = TEMP_NP[software_slug]
    
    # Create Dataset
    ds = Dataset()
    
    ds.bind("this", "http://purl.org/nanopub/temp/np/")
    ds.bind("sub", TEMP_NP)
    ds.bind("np", NP)
    ds.bind("dct", DCT)
    ds.bind("nt", NT)
    ds.bind("npx", NPX)
    ds.bind("xsd", XSD)
    ds.bind("rdfs", RDFS)
    ds.bind("orcid", ORCID)
    ds.bind("prov", PROV)
    ds.bind("foaf", FOAF)
    ds.bind("cito", CITO)
    ds.bind("skos", SKOS)
    ds.bind("schema", SCHEMA)
    
    # HEAD graph
    head = ds.graph(head_graph)
    head.add((this_np, RDF.type, NP.Nanopublication))
    head.add((this_np, NP.hasAssertion, assertion_graph))
    head.add((this_np, NP.hasProvenance, provenance_graph))
    head.add((this_np, NP.hasPublicationInfo, pubinfo_graph))
    
    # ASSERTION graph
    assertion = ds.graph(assertion_graph)
    assertion.add((software_uri, RDF.type, DCMITYPE.Software))
    assertion.add((software_uri, DCT.title, Literal(title)))
    
    # Repository
    if np_config.get('repository_uri'):
        assertion.add((software_uri, SCHEMA.maintainer, URIRef(np_config['repository_uri'])))
    
    # License
    if np_config.get('license_uri'):
        assertion.add((software_uri, DCT.license, URIRef(np_config['license_uri'])))
    
    # Related publications (cito:supports)
    for pub in np_config.get('related_publications', []):
        pub_uri = URIRef(pub if pub.startswith('http') else f"https://doi.org/{pub}")
        assertion.add((software_uri, CITO.supports, pub_uri))
    
    # Related resources (skos:related)
    for resource in np_config.get('related_resources', []):
        assertion.add((software_uri, SKOS.related, URIRef(resource)))
    
    # isPartOf (in assertion for Software)
    is_part_of = metadata.get('is_part_of', {})
    if is_part_of.get('uri'):
        assertion.add((software_uri, DCT.isPartOf, URIRef(is_part_of['uri'])))
    
    # PROVENANCE graph
    provenance = ds.graph(provenance_graph)
    provenance.add((assertion_graph, PROV.wasAttributedTo, author_uri))
    
    # PUBINFO graph
    pubinfo = ds.graph(pubinfo_graph)
    pubinfo.add((author_uri, FOAF.name, Literal(metadata['creator_name'])))
    
    now = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.000Z")
    pubinfo.add((this_np, DCT.created, Literal(now, datatype=XSD.dateTime)))
    pubinfo.add((this_np, DCT.creator, author_uri))
    pubinfo.add((this_np, DCT.license, URIRef("https://creativecommons.org/licenses/by/4.0/")))
    pubinfo.add((this_np, NPX.wasCreatedAt, URIRef("https://sciencelive4all.org/")))
    pubinfo.add((this_np, NPX.hasNanopubType, DCMITYPE.Software))
    
    label = f"Software: {title}"
    pubinfo.add((this_np, RDFS.label, Literal(label)))
    
    # Template references
    pubinfo.add((this_np, NT.wasCreatedFromTemplate, SOFTWARE_TEMPLATE))
    pubinfo.add((this_np, NT.wasCreatedFromProvenanceTemplate, PROV_TEMPLATE))
    pubinfo.add((this_np, NT.wasCreatedFromPubinfoTemplate, PUBINFO_TEMPLATE_1))
    pubinfo.add((this_np, NT.wasCreatedFromPubinfoTemplate, PUBINFO_TEMPLATE_2))
    pubinfo.add((this_np, NT.wasCreatedFromPubinfoTemplate, PUBINFO_TEMPLATE_3))
    
    return ds, label

print("‚úì Function defined")

In [None]:
# Create output directory
Path(OUTPUT_DIR).mkdir(parents=True, exist_ok=True)

# Generate all nanopublications
generated_files = []

for np_config in config['nanopublications']:
    ds, label = create_software_nanopub(np_config, metadata)
    
    output_file = Path(OUTPUT_DIR) / f"{np_config['id']}.trig"
    ds.serialize(destination=str(output_file), format='trig')
    generated_files.append(output_file)
    
    print(f"‚úì Generated: {output_file}")

print(f"\nTotal generated: {len(generated_files)} nanopublications")

---
# üìÑ SECTION 5: PREVIEW & OUTPUT
---

In [None]:
if generated_files:
    print(f"Preview of {generated_files[0]}:\n")
    print("=" * 80)
    with open(generated_files[0], 'r') as f:
        print(f.read())

---
# üöÄ SECTION 6: SIGN & PUBLISH (OPTIONAL)
---

In [None]:
PUBLISH = False
USE_TEST_SERVER = True

In [None]:
if PUBLISH:
    from nanopub import Nanopub, NanopubConf, load_profile
    
    profile = load_profile()
    print(f"Loaded profile: {profile.name}")
    
    conf = NanopubConf(profile=profile, use_test_server=USE_TEST_SERVER)
    
    for trig_file in generated_files:
        np_obj = Nanopub(rdf=trig_file, conf=conf)
        np_obj.sign()
        
        signed_path = trig_file.with_suffix('.signed.trig')
        np_obj.store(signed_path)
        print(f"‚úì Signed: {signed_path}")
        
        np_obj.publish()
        print(f"‚úì Published: {np_obj.source_uri}")
else:
    print("Publishing disabled. Set PUBLISH = True to enable.")