In [1]:
from marshmallow import Schema, fields

In [2]:
from sparql_queries import GolemQuery

The defined prefixes are stored in the GolemQuery class in `sparql_queries.py`. These are the cannonical prefixes.

In [3]:
GolemQuery.prefixes

[{'prefix': 'gd', 'uri': 'http://data.golemlab.eu/data/'},
 {'prefix': 'gt', 'uri': 'http://data.golemlab.eu/data/entity/type/'},
 {'prefix': 'crm', 'uri': 'http://www.cidoc-crm.org/cidoc-crm/'},
 {'prefix': 'owl', 'uri': 'http://www.w3.org/2002/07/owl#'},
 {'prefix': 'xsd', 'uri': 'http://www.w3.org/2001/XMLSchema#'},
 {'prefix': 'cls', 'uri': 'http://clscor.io/ontology/'},
 {'prefix': 'go', 'uri': 'http://golemlab.eu/ontology/'},
 {'prefix': 'lrm', 'uri': 'http://www.cidoc-crm.org/cidoc-crm/lrmoo/'},
 {'prefix': 'rdfs', 'uri': 'http://www.w3.org/2000/01/rdf-schema#'},
 {'prefix': 'nif',
  'uri': 'http://persistence.uni-leipzig.org/nlp2rdf/ontologies/nif-core#'}]

In [4]:
golem_query = GolemQuery()

In [5]:
golem_query.get_prefix_uri("gd")

'http://data.golemlab.eu/data/'

Schemas of response objects are stored in `schemas.py`. These should be used to validate the responses.

In [6]:
from corpus import Corpus

In [7]:
test_id = "potter_corpus"

In [8]:
# Setup an instance with all attributes
test_corpus = Corpus(
    id = test_id,
    #URI will be derived from the ID:
    uri = golem_query.get_prefix_uri("gd") + test_id, 
    name = "Harry Potter Corpus",
    acronym = "potter",
    description = "Harry Potter Corpus derived form AO3.",
    licence = dict(
        name="CC0",
        uri="https://creativecommons.org/publicdomain/zero/1.0"),
    repository = dict(
        url="https://github.com/GOLEM-lab/potter_corpus"
    )
)

In [9]:
# Manually create metrics
test_corpus_metrics = dict(
    chapters = 500,
    paragraphs = 9000,
    characters = 4000,
    male = 1990,
    female = 1990,
    nonbinary = 20,
    comments = 7000,
    wordsInDocuments = 500000,
    wordsInComments = 20000
)

In [10]:
test_corpus.metrics = test_corpus_metrics

In [11]:
test_corpus.get_metadata(include_metrics=True,validation=True)

{'id': 'potter_corpus',
 'uri': 'http://data.golemlab.eu/data/potter_corpus',
 'corpusName': 'Harry Potter Corpus',
 'acronym': 'potter',
 'corpusDescription': 'Harry Potter Corpus derived form AO3.',
 'licence': 'CC0',
 'licenceUrl': 'https://creativecommons.org/publicdomain/zero/1.0',
 'repository': 'https://github.com/GOLEM-lab/potter_corpus',
 'metrics': {'chapters': 500,
  'paragraphs': 9000,
  'characters': 4000,
  'male': 1990,
  'female': 1990,
  'nonbinary': 20,
  'comments': 7000,
  'wordsInDocuments': 500000,
  'wordsInComments': 20000}}

In [12]:
test_corpus_rdf = test_corpus.generate_graph()

In [13]:
print(test_corpus_rdf.serialize())

@prefix cls: <http://clscor.io/ontology/> .
@prefix crm: <http://www.cidoc-crm.org/cidoc-crm/> .
@prefix gd: <http://data.golemlab.eu/data/> .
@prefix gt: <http://data.golemlab.eu/data/entity/type/> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

<http://data.golemlab.eu/data/potter_corpus/acronym> a crm:E41_Appellation ;
    crm:P1i_identifies gd:potter_corpus ;
    crm:P2_has_type gt:corpus_acronym ;
    rdf:value "potter" .

<http://data.golemlab.eu/data/potter_corpus/corpus_name> a crm:E41_Appellation ;
    crm:P1i_identifies gd:potter_corpus ;
    crm:P2_has_type gt:corpus_name ;
    rdf:value "Harry Potter Corpus" .

<http://data.golemlab.eu/data/potter_corpus/dimension/chapters> a crm:E54_Dimension ;
    crm:P90_has_value "500"^^xsd:int .

<http://data.golemlab.eu/data/potter_corpus/dimension/characters> a crm:E54_Dimension ;
    crm:P90_has_value "4000"^^xs

In [14]:
test_corpus_rdf.serialize(destination="data/test_corpus.ttl",format="ttl")

<Graph identifier=N44072b3d1dd84bffbaccde3b65e10497 (<class 'rdflib.graph.Graph'>)>

## Character data

In [15]:
from character import Character

In [16]:
test_character_id = "C000000001"
test_character = Character(
    id=test_character_id,
    uri= golem_query.get_prefix_uri("gd") + test_character_id,
    name="Harry Potter",
    character_type = "canon",
    gender = "male",
    corpus_ids = [test_id] #corpora the character is included
)
# There would be more to add

In [17]:
test_character.get_metadata()

{'id': 'C000000001', 'uri': 'http://data.golemlab.eu/data/C000000001'}

In [18]:
test_character_rdf = test_character.generate_graph()

In [19]:
print(test_character_rdf.serialize())

@prefix crm: <http://www.cidoc-crm.org/cidoc-crm/> .
@prefix gd: <http://data.golemlab.eu/data/> .
@prefix go: <http://golemlab.eu/ontology/> .
@prefix gt: <http://data.golemlab.eu/data/entity/type/> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .

gd:C000000001 a go:C1_Character_Concept ;
    rdfs:label "Harry Potter" ;
    crm:P148i_is_component_of gd:potter_corpus ;
    crm:P1_is_identified_by <http://data.golemlab.eu/data/C000000001/character_name>,
        <http://data.golemlab.eu/data/C000000001/id> ;
    crm:P2_has_type gt:canon_character,
        <http://data.golemlab.eu/data/entity/type/gender/male> .

<http://data.golemlab.eu/data/C000000001/character_name> a crm:E41_Appellation ;
    crm:P2_has_type gt:character_name ;
    rdf:value "Harry Potter" .

<http://data.golemlab.eu/data/C000000001/id> a crm:E42_Identifier ;
    crm:P2_has_type gt:id ;
    rdf:value "C000000001" .

gd:potter_corpus crm:P148_has_c

In [20]:
hermione_id = "C000000002"
hermione = Character(
    id=hermione_id,
    uri= golem_query.get_prefix_uri("gd") + hermione_id,
    name="Hermione Granger",
    character_type = "canon",
    gender = "female",
    corpus_ids = [test_id] #corpora the character is included
)
# There would be more to add

In [21]:
hermione_rdf = hermione.generate_graph()
print(hermione_rdf.serialize())

@prefix crm: <http://www.cidoc-crm.org/cidoc-crm/> .
@prefix gd: <http://data.golemlab.eu/data/> .
@prefix go: <http://golemlab.eu/ontology/> .
@prefix gt: <http://data.golemlab.eu/data/entity/type/> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .

gd:C000000002 a go:C1_Character_Concept ;
    rdfs:label "Hermione Granger" ;
    crm:P148i_is_component_of gd:potter_corpus ;
    crm:P1_is_identified_by <http://data.golemlab.eu/data/C000000002/character_name>,
        <http://data.golemlab.eu/data/C000000002/id> ;
    crm:P2_has_type gt:canon_character,
        <http://data.golemlab.eu/data/entity/type/gender/female> .

<http://data.golemlab.eu/data/C000000002/character_name> a crm:E41_Appellation ;
    crm:P2_has_type gt:character_name ;
    rdf:value "Hermione Granger" .

<http://data.golemlab.eu/data/C000000002/id> a crm:E42_Identifier ;
    crm:P2_has_type gt:id ;
    rdf:value "C000000002" .

gd:potter_corpus crm:

### Work

In [22]:
from work import Work

In [23]:
philosophers_stone_character_data = [
    {
        "id": hermione_id,
        "effect": "created"
    },
    {
        "id": test_character_id,
        "effect": "created"
    }
]

philosophers_stone = Work(
    id="W000000001",
    uri=golem_query.get_prefix_uri("gd") + "W000000001",
    title="Harry Potter and the Philosopher's Stone",
    characters=philosophers_stone_character_data
)

In [24]:
philosophers_stone_rdf = philosophers_stone.generate_graph()
print(philosophers_stone_rdf.serialize())

@prefix ns1: <http://www.cidoc-crm.org/cidoc-crm/> .
@prefix ns2: <http://www.cidoc-crm.org/cidoc-crm/lrmoo/> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .

<http://data.golemlab.eu/data/C000000001> ns1:P94i_was_created_by <http://data.golemlab.eu/data/W000000001/creation> .

<http://data.golemlab.eu/data/C000000002> ns1:P94i_was_created_by <http://data.golemlab.eu/data/W000000001/creation> .

<http://data.golemlab.eu/data/W000000001/id> a ns1:E42_Identifier ;
    ns1:P1i_identifies <http://data.golemlab.eu/data/W000000001> ;
    ns1:P2_has_type <http://data.golemlab.eu/data/entity/type/id> ;
    rdf:value "W000000001" .

<http://data.golemlab.eu/data/W000000001/title> a ns1:E35_Title ;
    ns1:P102i_is_title_of <http://data.golemlab.eu/data/W000000001> ;
    rdf:value "Harry Potter and the Philosopher's Stone" .

<http://data.golemlab.eu/data/W000000001> a ns2:F1_Work ;
    rdfs:label "Harry Potter and the Philos

jk_rowling_id = "A000000001"
hp_phil_stone = work(
    "W000000001",
    "Harry Potter and the Philosopher's Stone", 
    [jk_rowling_id], 
    [test_character_id,hermione_id], 
    "1997")
print(hp_phil_stone.serialize())

## all

In [25]:
combined = test_corpus_rdf + test_character_rdf + hermione_rdf + philosophers_stone_rdf

In [26]:
combined.serialize(destination="data/testdata.ttl",format="ttl")

<Graph identifier=N590010d098884810bf8f7b64b45aa9e8 (<class 'rdflib.graph.Graph'>)>