In [1]:
import rdflib
from marshmallow import Schema, fields

In [2]:
from sparql_queries import GolemQuery

The defined prefixes are stored in the GolemQuery class in `sparql_queries.py`. These are the cannonical prefixes.

In [3]:
GolemQuery.prefixes

[{'prefix': 'gd', 'uri': 'http://data.golemlab.eu/data/'},
 {'prefix': 'gt', 'uri': 'http://data.golemlab.eu/data/entity/type/'},
 {'prefix': 'crm', 'uri': 'http://www.cidoc-crm.org/cidoc-crm/'},
 {'prefix': 'owl', 'uri': 'http://www.w3.org/2002/07/owl#'},
 {'prefix': 'xsd', 'uri': 'http://www.w3.org/2001/XMLSchema#'},
 {'prefix': 'cls', 'uri': 'http://clscor.io/ontology/'},
 {'prefix': 'go', 'uri': 'http://golemlab.eu/ontology/'},
 {'prefix': 'rdfs', 'uri': 'http://www.w3.org/2000/01/rdf-schema#'},
 {'prefix': 'nif',
  'uri': 'http://persistence.uni-leipzig.org/nlp2rdf/ontologies/nif-core#'}]

In [4]:
def get_prefix_uri(prefix):
    """Get the uri for a prefix form GolemQuery.prefixes"""
    return list(filter(lambda item: prefix in item["prefix"] , GolemQuery.prefixes))[0]["uri"]

Schemas of response objects are stored in `schemas.py`. These should be used to validate the responses.

In [5]:
from corpus import Corpus

In [6]:
test_id = "potter_corpus"

In [7]:
# Setup an instance with all attributes
test_corpus = Corpus(
    id = test_id,
    #URI will be derived from the ID:
    uri = get_prefix_uri("gd") + test_id, 
    name = "Harry Potter Corpus",
    acronym = "potter",
    description = "Harry Potter Corpus derived form AO3.",
    licence = dict(
        name="CC0",
        uri="https://creativecommons.org/publicdomain/zero/1.0"),
    repository = dict(
        url="https://github.com/GOLEM-lab/potter_corpus"
    )
)

In [9]:
# Manually create metrics
test_corpus_metrics = dict(
    chapters = 500,
    paragraphs = 9000,
    characters = 4000,
    male = 1990,
    female = 1990,
    nonbinary = 20,
    comments = 7000,
    wordsInDocuments = 500000,
    wordsInComments = 20000
)

In [12]:
test_corpus.metrics = test_corpus_metrics

In [15]:
test_corpus.get_metadata(include_metrics=True,validation=False)

{'id': 'potter_corpus',
 'uri': 'http://data.golemlab.eu/data/potter_corpus',
 'corpusName': 'Harry Potter Corpus',
 'acronym': 'potter',
 'corpusDescription': 'Harry Potter Corpus derived form AO3.',
 'licence': 'CC0',
 'licenceUrl': 'https://creativecommons.org/publicdomain/zero/1.0',
 'repository': 'https://github.com/GOLEM-lab/potter_corpus',
 'metrics': {'chapters': 500,
  'paragraphs': 9000,
  'characters': 4000,
  'male': 1990,
  'female': 1990,
  'nonbinary': 20,
  'comments': 7000}}