# JSON-LD Demonstration

In this notebook, we will explore the principles of JSON-LD using the example of a person. JSON-LD stands for "JSON for Linking Data" and it provides a method to enrich your JSON data with semantics.

An operational version of this notebook can be accessed [here](https://colab.research.google.com/drive/14XqRJPWs07RUQgZmDZEu3yb2m1xGvxEQ?usp=sharing).

In [None]:
# Install the required library for JSON schema validation
!pip install jsonschema
!pip install rdflib

Collecting rdflib
  Downloading rdflib-7.0.0-py3-none-any.whl (531 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m531.9/531.9 kB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting isodate<0.7.0,>=0.6.0 (from rdflib)
  Downloading isodate-0.6.1-py2.py3-none-any.whl (41 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.7/41.7 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: isodate, rdflib
Successfully installed isodate-0.6.1 rdflib-7.0.0


In [None]:
import jsonschema
from jsonschema import validate
import json
import rdflib

# Regular JSON representation of a person
person_data = {
    "@context": {
        "schema": "https://schema.org/",
        "firstName": "schema:givenName",
        "lastName": "schema:lastName",
        "birthdate": "schema:birthDate",
        "institute": "schema:affiliation",
        "name": "schema:name",
        "street": "schema:streetAddress",
        "city": "schema:locality",
        "zip": "schema:postalCode"
    },
    "@id": "https://www.example.com/SimonClark",
    "@type": "schema:Person",
    "firstName": "Simon",
    "lastName": "Clark",
    "birthdate": "1987-04-23",
    "institute": {
        "@id": "https://www.example.com/SINTEF",
        "@type": "schema:ResearchOrganization",
        "name": "SINTEF",
        "street": "Strindvegen 4",
        "city": "Trondheim",
        "zip": "7034"
    }
}


In the JSON-LD example, we added an `@context` that maps terms in our JSON data to their semantic meanings, using URIs (typically from established vocabularies, like schema.org). This allows machines to understand the semantics behind the data.


In [None]:
# Regular JSON representation of a person
person_data = {
    "@context": "https://schema.org/",
    "@id": "https://orcid.org/0000-0002-8758-6109",
    "@type": "Person",
    "firstName": "Simon",
    "lastName": "Clark",
    "gender": {"@type": "Male"},
    "birthDate": "1987-04-23",
    "affiliation": {
        "@id": "https://ror.org/01f677e56",
        "@type": "ResearchOrganization"
    }
}

# email to: simon.clark@sintef.no

In [None]:
person_schema = {
    "type": "object",
    "properties": {
        "firstName": {
            "type": "string"
        },
        "lastName": {
            "type": "string",
            "minLength": 1
        },
        "birthDate": {   # Replacing age with birthdate
            "type": "string",
            "format": "date",
            "pattern": "^[0-9]{4}-[0-1][0-9]-[0-3][0-9]$"
        },
        "gender": {
            "type": "object"
        },
        "affiliation": {
            "type": "object"
        }
    },
    "required": ["firstName", "lastName", "birthDate", "affiliation"]  # Updated age to birthdate
}

# Function to validate JSON data against the schema
def validate_json(data, schema):
    try:
        validate(instance=data, schema=schema)
        return True, "JSON data is valid according to the schema."
    except jsonschema.exceptions.ValidationError as ve:
        return False, ve.message

# Validate the sample JSON data
is_valid, message = validate_json(person_data, person_schema)
print(message)

JSON data is valid according to the schema.


In [None]:
# Create a new graph
g = rdflib.Graph()

# Load schema.org vocabulary into the graph
g.parse("https://schema.org/version/latest/schemaorg-current-http.jsonld", format="json-ld")

person_data_str = json.dumps(person_data)
g.parse(data=person_data_str, format="json-ld")

# Define and execute a SPARQL query for all instances of Organization
sparql_query = """
PREFIX schema: <http://schema.org/>
SELECT DISTINCT ?type WHERE {
  ?type rdfs:subClassOf* schema:Organization .
}
LIMIT 20
"""

# Execute the SPARQL query
results = g.query(sparql_query)

# Print the results
for row in results:
    print(row)

(rdflib.term.URIRef('http://schema.org/Organization'),)
(rdflib.term.URIRef('http://schema.org/PerformingGroup'),)
(rdflib.term.URIRef('http://schema.org/TheaterGroup'),)
(rdflib.term.URIRef('http://schema.org/MusicGroup'),)
(rdflib.term.URIRef('http://schema.org/DanceGroup'),)
(rdflib.term.URIRef('http://schema.org/OnlineBusiness'),)
(rdflib.term.URIRef('http://schema.org/OnlineStore'),)
(rdflib.term.URIRef('http://schema.org/SportsOrganization'),)
(rdflib.term.URIRef('http://schema.org/SportsTeam'),)
(rdflib.term.URIRef('http://schema.org/Airline'),)
(rdflib.term.URIRef('http://schema.org/SearchRescueOrganization'),)
(rdflib.term.URIRef('http://schema.org/FundingScheme'),)
(rdflib.term.URIRef('http://schema.org/NewsMediaOrganization'),)
(rdflib.term.URIRef('http://schema.org/EducationalOrganization'),)
(rdflib.term.URIRef('http://schema.org/CollegeOrUniversity'),)
(rdflib.term.URIRef('http://schema.org/HighSchool'),)
(rdflib.term.URIRef('http://schema.org/Preschool'),)
(rdflib.term.U

In [None]:
# Define and execute a SPARQL query for all instances of Organization
sparql_query = """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX schema: <http://schema.org/>

SELECT ?instance WHERE {
    ?subclass rdfs:subClassOf* schema:Organization .
    ?instance rdf:type ?subclass .
}
LIMIT 10
"""

# Execute the SPARQL query
results = g.query(sparql_query)

# Print the results
for row in results:
    print(row[0])

https://ror.org/01f677e56


In [None]:
# Define and execute a SPARQL query for all instances of Organization
sparql_query = """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX schema: <http://schema.org/>

SELECT (COUNT(?subject) AS ?numMales) WHERE {
    ?subject rdf:type schema:Person .
    ?subject schema:gender ?gender .
    ?gender rdf:type schema:Male .
}
LIMIT 10
"""

# Execute the SPARQL query
results = g.query(sparql_query)

# Print the results
for row in results:
    print(row.numMales)

1


In [None]:
# Define and execute a SPARQL query for all instances of Organization
sparql_query = """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX schema: <http://schema.org/>

SELECT ?bday WHERE {
    ?subject rdf:type schema:Person .
    ?subject schema:birthDate ?bday .
}
LIMIT 10
"""

# Execute the SPARQL query
results = g.query(sparql_query)

# Print the results
for row in results:
    print(row[0])

1987-04-23
