In [1]:
# import a predefined query
from sparql_queries import CorpusMetrics

In [2]:
# instantiate it
MyQuery = CorpusMetrics()

In [3]:
# explain method provides a short description of the query
print(MyQuery.explain())

Corpus Metrics: 
    Get all metrics of a corpus identified by URI
    


In [4]:
# output the template
print(MyQuery.template)


        SELECT ?dimensionURI ?value WHERE {
            <$1> crm:P43_has_dimension ?dimensionURI .
            ?dimensionURI crm:P90_has_value ?value .
            ?dimensionURI rdfs:label ?dimension .
        }
        


In [5]:
# query needs to be prepared (insert namespace prefixes)
MyQuery.prepare()

In [6]:
print(MyQuery.query)

PREFIX gd: <http://golemlab.eu/data/>
PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX cls: <http://clscor.io/ontology/>
PREFIX go: <http://golemlab.eu/ontology/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX nif: <http://persistence.uni-leipzig.org/nlp2rdf/ontologies/nif-core#>
        SELECT ?dimensionURI ?value WHERE {
            <$1> crm:P43_has_dimension ?dimensionURI .
            ?dimensionURI crm:P90_has_value ?value .
            ?dimensionURI rdfs:label ?dimension .
        }
        


In [7]:
# there is still a variable in it,need to inject an uri
corpus_uri = "http://golemlab.eu/data/" + "potter_corpus" # only full URIs work
MyQuery.inject([corpus_uri])

True

In [8]:
# Variable should be replaced
print(MyQuery.query)

PREFIX gd: <http://golemlab.eu/data/>
PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX cls: <http://clscor.io/ontology/>
PREFIX go: <http://golemlab.eu/ontology/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX nif: <http://persistence.uni-leipzig.org/nlp2rdf/ontologies/nif-core#>
        SELECT ?dimensionURI ?value WHERE {
            <http://golemlab.eu/data/potter_corpus> crm:P43_has_dimension ?dimensionURI .
            ?dimensionURI crm:P90_has_value ?value .
            ?dimensionURI rdfs:label ?dimension .
        }
        


In [9]:
from sparql import DB

In [10]:
#setup sparql connection
virtuoso = DB(triplestore="virtuoso", protocol="http",url="localhost",port="8890")

In [11]:
#execute the query; need to pass the database connection
MyQuery.execute(virtuoso)

True

In [12]:
# show the results
MyQuery.results.dump()

{'head': {'link': [], 'vars': ['dimensionURI', 'value']},
 'results': {'distinct': False,
  'ordered': True,
  'bindings': [{'dimensionURI': {'type': 'uri',
     'value': 'http://golemlab.eu/data/potter_corpus/measurement/number_of_male_characters'},
    'value': {'type': 'typed-literal',
     'datatype': 'http://www.w3.org/2001/XMLSchema#int',
     'value': '10'}},
   {'dimensionURI': {'type': 'uri',
     'value': 'http://golemlab.eu/data/potter_corpus/measurement/number_of_chapters'},
    'value': {'type': 'typed-literal',
     'datatype': 'http://www.w3.org/2001/XMLSchema#int',
     'value': '700000'}},
   {'dimensionURI': {'type': 'uri',
     'value': 'http://golemlab.eu/data/potter_corpus/measurement/number_of_characters'},
    'value': {'type': 'typed-literal',
     'datatype': 'http://www.w3.org/2001/XMLSchema#int',
     'value': '20'}},
   {'dimensionURI': {'type': 'uri',
     'value': 'http://golemlab.eu/data/potter_corpus/measurement/number_of_comments'},
    'value': {'type'

In [13]:
MyQuery.results.simplify()

[{'dimensionURI': 'http://golemlab.eu/data/potter_corpus/measurement/number_of_male_characters',
  'value': 10},
 {'dimensionURI': 'http://golemlab.eu/data/potter_corpus/measurement/number_of_chapters',
  'value': 700000},
 {'dimensionURI': 'http://golemlab.eu/data/potter_corpus/measurement/number_of_characters',
  'value': 20},
 {'dimensionURI': 'http://golemlab.eu/data/potter_corpus/measurement/number_of_comments',
  'value': 123},
 {'dimensionURI': 'http://golemlab.eu/data/potter_corpus/measurement/number_of_documents',
  'value': 200000},
 {'dimensionURI': 'http://golemlab.eu/data/potter_corpus/measurement/number_of_female_characters',
  'value': 8},
 {'dimensionURI': 'http://golemlab.eu/data/potter_corpus/measurement/number_of_nonbinary_characters',
  'value': 2},
 {'dimensionURI': 'http://golemlab.eu/data/potter_corpus/measurement/number_of_paragraphs',
  'value': 9000000},
 {'dimensionURI': 'http://golemlab.eu/data/potter_corpus/measurement/number_of_words-comments',
  'value': 

In [14]:
metrics = {}
for item in MyQuery.results.simplify():
    key = item["dimensionURI"].split("/")[-1:][0]
    value = item["value"]
    metrics[key] = value

In [15]:
output = {}
output["metrics"] = metrics
output

{'metrics': {'number_of_male_characters': 10,
  'number_of_chapters': 700000,
  'number_of_characters': 20,
  'number_of_comments': 123,
  'number_of_documents': 200000,
  'number_of_female_characters': 8,
  'number_of_nonbinary_characters': 2,
  'number_of_paragraphs': 9000000,
  'number_of_words-comments': 123000,
  'number_of_words-text': 2000000000}}

## with GolemCorpus class

In [16]:
from golem_corpus import GolemCorpus

In [17]:
MyCorpus = GolemCorpus(uri=corpus_uri, database=virtuoso)

In [18]:
MyCorpus.get_metadata(include_metrics=True)

{'uri': 'http://golemlab.eu/data/potter_corpus',
 'name': None,
 'title': None,
 'description': None,
 'acronym': None,
 'metrics': {'number_of_male_characters': 10,
  'number_of_chapters': 700000,
  'number_of_characters': 20,
  'number_of_comments': 123,
  'number_of_documents': 200000,
  'number_of_female_characters': 8,
  'number_of_nonbinary_characters': 2,
  'number_of_paragraphs': 9000000,
  'number_of_words-comments': 123000,
  'number_of_words-text': 2000000000}}