In [1]:
from marshmallow import Schema, fields

In [2]:
from sparql_queries import GolemQuery

The defined prefixes are stored in the GolemQuery class in `sparql_queries.py`. These are the cannonical prefixes.

In [3]:
GolemQuery.prefixes

[{'prefix': 'gd', 'uri': 'http://data.golemlab.eu/data/'},
 {'prefix': 'gt', 'uri': 'http://data.golemlab.eu/data/entity/type/'},
 {'prefix': 'crm', 'uri': 'http://www.cidoc-crm.org/cidoc-crm/'},
 {'prefix': 'owl', 'uri': 'http://www.w3.org/2002/07/owl#'},
 {'prefix': 'xsd', 'uri': 'http://www.w3.org/2001/XMLSchema#'},
 {'prefix': 'cls', 'uri': 'http://clscor.io/ontology/'},
 {'prefix': 'go', 'uri': 'http://golemlab.eu/ontology/'},
 {'prefix': 'rdfs', 'uri': 'http://www.w3.org/2000/01/rdf-schema#'},
 {'prefix': 'nif',
  'uri': 'http://persistence.uni-leipzig.org/nlp2rdf/ontologies/nif-core#'}]

In [4]:
golem_query = GolemQuery()

In [5]:
golem_query.get_prefix_uri("gd")

'http://data.golemlab.eu/data/'

Schemas of response objects are stored in `schemas.py`. These should be used to validate the responses.

In [6]:
from corpus import Corpus

In [7]:
test_id = "potter_corpus"

In [8]:
# Setup an instance with all attributes
test_corpus = Corpus(
    id = test_id,
    #URI will be derived from the ID:
    uri = golem_query.get_prefix_uri("gd") + test_id, 
    name = "Harry Potter Corpus",
    acronym = "potter",
    description = "Harry Potter Corpus derived form AO3.",
    licence = dict(
        name="CC0",
        uri="https://creativecommons.org/publicdomain/zero/1.0"),
    repository = dict(
        url="https://github.com/GOLEM-lab/potter_corpus"
    )
)

In [9]:
# Manually create metrics
test_corpus_metrics = dict(
    chapters = 500,
    paragraphs = 9000,
    characters = 4000,
    male = 1990,
    female = 1990,
    nonbinary = 20,
    comments = 7000,
    wordsInDocuments = 500000,
    wordsInComments = 20000
)

In [10]:
test_corpus.metrics = test_corpus_metrics

In [11]:
test_corpus.get_metadata(include_metrics=True,validation=True)

{'id': 'potter_corpus',
 'uri': 'http://data.golemlab.eu/data/potter_corpus',
 'corpusName': 'Harry Potter Corpus',
 'acronym': 'potter',
 'corpusDescription': 'Harry Potter Corpus derived form AO3.',
 'licence': 'CC0',
 'licenceUrl': 'https://creativecommons.org/publicdomain/zero/1.0',
 'repository': 'https://github.com/GOLEM-lab/potter_corpus',
 'metrics': {'chapters': 500,
  'paragraphs': 9000,
  'characters': 4000,
  'male': 1990,
  'female': 1990,
  'nonbinary': 20,
  'comments': 7000,
  'wordsInDocuments': 500000,
  'wordsInComments': 20000}}

In [12]:
print(test_corpus.generate_graph().serialize())

@prefix cls: <http://clscor.io/ontology/> .
@prefix crm: <http://www.cidoc-crm.org/cidoc-crm/> .
@prefix gd: <http://data.golemlab.eu/data/> .
@prefix gt: <http://data.golemlab.eu/data/entity/type/> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

<http://data.golemlab.eu/data/potter_corpus/acronym> a crm:E41_Appellation ;
    crm:P1i_identifies gd:potter_corpus ;
    rdf:value "potter" .

<http://data.golemlab.eu/data/potter_corpus/corpus_name> a crm:E41_Appellation ;
    crm:P1i_identifies gd:potter_corpus ;
    crm:P2_has_type gt:corpus_name ;
    rdf:value "Harry Potter Corpus" .

<http://data.golemlab.eu/data/potter_corpus/dimension/chapters> a crm:E54_Dimension ;
    crm:P90_has_value "500"^^xsd:int .

<http://data.golemlab.eu/data/potter_corpus/dimension/characters> a crm:E54_Dimension ;
    crm:P90_has_value "4000"^^xsd:int .

<http://data.golemlab.eu/data/p