# Querying

This notebook demonstrates how to retrieve, query and search data using the Forge.

In [1]:
from kgforge.core import KnowledgeGraphForge

In [2]:
import getpass

In [3]:
token = getpass.getpass()

In [4]:
forge = KnowledgeGraphForge("../../configurations/demo-forge-nexus.yml", token=token)

## Imports

In [5]:
from kgforge.core import Resource

## Retrieval

In [6]:
jane = Resource(type="Person", name="Jane Doe", email="jane.doe@sxample.org")

In [7]:
forge.register(jane)

<action> _register_one
<succeeded> True


In [8]:
resource = forge.retrieve(jane.id)

In [9]:
resource == jane

True

### specific version

In [10]:
jane = Resource(type="Person", name="Jane Doe")

In [11]:
forge.register(jane)

<action> _register_one
<succeeded> True


In [12]:
forge.tag(jane, "v1")

<action> _tag_one
<succeeded> True


In [13]:
jane.email = "jane.doe@epfl.ch"

In [14]:
forge.update(jane)

<action> _update_one
<succeeded> True


In [15]:
jane._store_metadata._rev

3

In [16]:
jane_v1 = forge.retrieve(jane.id, version=1)

In [17]:
jane_v1_tag = forge.retrieve(jane.id, version="v1")

In [18]:
jane_v1 == jane_v1_tag

True

In [19]:
print(jane_v1)

{
    context: http://context.example.org
    id: https://bbp.epfl.ch/dke/kgforge/bc72074b-4bac-471e-8d4f-62bd98afab8a
    type: Person
    name: Jane Doe
}


### error handling

In [20]:
resource = forge.retrieve("123")

<action> retrieve
<error> RetrievalError: not found



In [21]:
print(resource)

None


## Searching

In [22]:
jane = Resource(type="Person", name="Jane Doe")

In [23]:
john = Resource(type="Person", name="John Smith")

In [24]:
association_jane = Resource(type="Association", agent=jane)

In [25]:
association_john = Resource(type="Association", agent=john)

In [26]:
associations = [association_jane, association_john]

In [27]:
forge.register(associations)

<count> 2
<action> _register_many
<succeeded> True


`Association` is a known type by the Model, and the `paths` method will load the data structure for the given type. Refer to the `11 - Modeling.ipynb` notebook to learn about Modeling and Types.

In [28]:
p = forge.paths("Association")

You have autocompletion on `p` and this can be used to build a search.

In [29]:
resources = forge.search(p.type == "Association", limit=5)

In [30]:
type(resources)

list

In [31]:
len(resources)  

5

In [32]:
type(resources[0])

kgforge.core.resource.Resource

In [33]:
forge.as_dataframe(resources)

Unnamed: 0,id,type,agent.type,agent.gender.label,agent.name,distribution.type,distribution.contentSize.unitCode,distribution.contentSize.value,distribution.contentUrl,distribution.digest.algorithm,distribution.digest.value,distribution.encodingFormat,distribution.name,name
0,https://bbp.epfl.ch/dke/kgforge/a4cca69d-9cf1-...,Association,Person,female,Marie Curie,DataDownload,bytes,46.0,https://staging.nexus.ocp.bbp.epfl.ch/v1/files...,SHA-256,e0fe65f725bf28fe2b88c7bafb51fb5ef1df0ab14c68a3...,text/plain,marie_curie.txt,Curie Association
1,https://bbp.epfl.ch/dke/kgforge/38d723d9-a9c9-...,Association,Person,male,Albert Einstein,DataDownload,bytes,50.0,https://staging.nexus.ocp.bbp.epfl.ch/v1/files...,SHA-256,91a5ce5c84dc5bead730a4b49d0698b4aaef4bc06ce164...,text/plain,albert_einstein.txt,Einstein Association
2,https://bbp.epfl.ch/dke/kgforge/a42bddb3-b202-...,Association,Person,male,Albert Einstein,DataDownload,bytes,50.0,https://staging.nexus.ocp.bbp.epfl.ch/v1/files...,SHA-256,91a5ce5c84dc5bead730a4b49d0698b4aaef4bc06ce164...,text/plain,albert_einstein.txt,Einstein Association
3,https://bbp.epfl.ch/dke/kgforge/df941e38-a58a-...,Association,Person,female,Marie Curie,DataDownload,bytes,46.0,https://staging.nexus.ocp.bbp.epfl.ch/v1/files...,SHA-256,e0fe65f725bf28fe2b88c7bafb51fb5ef1df0ab14c68a3...,text/plain,marie_curie.txt,Curie Association
4,https://bbp.epfl.ch/dke/kgforge/20915f5f-b663-...,Association,Person,,Jane Doe,,,,,,,,,


In [34]:
forge.as_dataframe(resources, store_metadata=True)

Unnamed: 0,id,type,agent.type,agent.gender.label,agent.name,distribution.type,distribution.contentSize.unitCode,distribution.contentSize.value,distribution.contentUrl,distribution.digest.algorithm,...,_createdAt,_createdBy,_deprecated,_incoming,_outgoing,_project,_rev,_self,_updatedAt,_updatedBy
0,https://bbp.epfl.ch/dke/kgforge/a4cca69d-9cf1-...,Association,Person,female,Marie Curie,DataDownload,bytes,46.0,https://staging.nexus.ocp.bbp.epfl.ch/v1/files...,SHA-256,...,2020-05-24T06:24:59.463417Z,https://staging.nexus.ocp.bbp.epfl.ch/v1/realm...,False,https://staging.nexus.ocp.bbp.epfl.ch/v1/resou...,https://staging.nexus.ocp.bbp.epfl.ch/v1/resou...,https://staging.nexus.ocp.bbp.epfl.ch/v1/proje...,1,https://staging.nexus.ocp.bbp.epfl.ch/v1/resou...,2020-05-24T06:24:59.463417Z,https://staging.nexus.ocp.bbp.epfl.ch/v1/realm...
1,https://bbp.epfl.ch/dke/kgforge/38d723d9-a9c9-...,Association,Person,male,Albert Einstein,DataDownload,bytes,50.0,https://staging.nexus.ocp.bbp.epfl.ch/v1/files...,SHA-256,...,2020-05-24T06:24:59.463408Z,https://staging.nexus.ocp.bbp.epfl.ch/v1/realm...,False,https://staging.nexus.ocp.bbp.epfl.ch/v1/resou...,https://staging.nexus.ocp.bbp.epfl.ch/v1/resou...,https://staging.nexus.ocp.bbp.epfl.ch/v1/proje...,1,https://staging.nexus.ocp.bbp.epfl.ch/v1/resou...,2020-05-24T06:24:59.463408Z,https://staging.nexus.ocp.bbp.epfl.ch/v1/realm...
2,https://bbp.epfl.ch/dke/kgforge/a42bddb3-b202-...,Association,Person,male,Albert Einstein,DataDownload,bytes,50.0,https://staging.nexus.ocp.bbp.epfl.ch/v1/files...,SHA-256,...,2020-05-24T06:25:41.198385Z,https://staging.nexus.ocp.bbp.epfl.ch/v1/realm...,False,https://staging.nexus.ocp.bbp.epfl.ch/v1/resou...,https://staging.nexus.ocp.bbp.epfl.ch/v1/resou...,https://staging.nexus.ocp.bbp.epfl.ch/v1/proje...,1,https://staging.nexus.ocp.bbp.epfl.ch/v1/resou...,2020-05-24T06:25:41.198385Z,https://staging.nexus.ocp.bbp.epfl.ch/v1/realm...
3,https://bbp.epfl.ch/dke/kgforge/df941e38-a58a-...,Association,Person,female,Marie Curie,DataDownload,bytes,46.0,https://staging.nexus.ocp.bbp.epfl.ch/v1/files...,SHA-256,...,2020-05-24T06:25:41.198385Z,https://staging.nexus.ocp.bbp.epfl.ch/v1/realm...,False,https://staging.nexus.ocp.bbp.epfl.ch/v1/resou...,https://staging.nexus.ocp.bbp.epfl.ch/v1/resou...,https://staging.nexus.ocp.bbp.epfl.ch/v1/proje...,1,https://staging.nexus.ocp.bbp.epfl.ch/v1/resou...,2020-05-24T06:25:41.198385Z,https://staging.nexus.ocp.bbp.epfl.ch/v1/realm...
4,https://bbp.epfl.ch/dke/kgforge/20915f5f-b663-...,Association,Person,,Jane Doe,,,,,,...,2020-05-24T06:28:16.634822Z,https://staging.nexus.ocp.bbp.epfl.ch/v1/realm...,False,https://staging.nexus.ocp.bbp.epfl.ch/v1/resou...,https://staging.nexus.ocp.bbp.epfl.ch/v1/resou...,https://staging.nexus.ocp.bbp.epfl.ch/v1/proje...,1,https://staging.nexus.ocp.bbp.epfl.ch/v1/resou...,2020-05-24T06:28:16.634822Z,https://staging.nexus.ocp.bbp.epfl.ch/v1/realm...


### nested field querying

You have autocompletion on `p` and also on nested properties like `p.agent`.

In [35]:
resources = forge.search(p.type == "Association", p.agent.name == "John Smith", limit=5)

In [36]:
len(resources)

5

In [37]:
forge.as_dataframe(resources)

Unnamed: 0,id,type,agent.type,agent.name,agent
0,https://bbp.epfl.ch/dke/kgforge/f9a236f8-aef6-...,Association,Person,John Smith,
1,https://bbp.epfl.ch/dke/kgforge/aa49682d-4ca2-...,Association,Person,John Smith,
2,https://bbp.epfl.ch/dke/kgforge/69ede0eb-98f9-...,Association,Person,John Smith,
3,https://bbp.epfl.ch/dke/kgforge/469b63b2-28b0-...,Association,Person,John Smith,
4,https://bbp.epfl.ch/dke/kgforge/78c956f1-79f1-...,Association,,,"[{'type': 'Person', 'name': 'John Smith'}, {'t..."


## Graph traversing

SPARQL is used to traverse the graph. The user can provide simplified queries: no prefixes, no compacted URIs, nor full URIs is required, only the term (property) available in the Model to the user.  Refer to the `11 - Modeling.ipynb` notebook to learn about Templates.

In [38]:
jane = Resource(type="Person", name="Jane Doe")

In [39]:
john = Resource(type="Person", name="John Smith")

In [40]:
association = Resource(type="Association", agent=[jane, john])

In [41]:
forge.register(association)

<action> _register_one
<succeeded> True


In [42]:
forge.template("Association")

{
    id: ""
    type: Association
    agent:
    {
        type: Person
        address:
        {
            type: PostalAddress
            postalCode: ""
            streetAddress: ""
        }
        birthDate: 9999-12-31
        deathDate: 9999-12-31
        gender:
        [
            female
            male
        ]
        givenName: ""
        name: ""
    }
}


In [43]:
query = """
    SELECT ?x ?name
    WHERE {
        ?x type Association ;
           agent ?agent .
        ?agent name ?name .
    }
"""

In [44]:
resources = forge.sparql(query, limit=5)

In [45]:
type(resources)

list

In [46]:
len(resources)

5

In [47]:
type(resources[0])

kgforge.core.resource.Resource

### rewritten query display

In [48]:
resources = forge.sparql(query, debug=True)

Submitted query:
   PREFIX nsg: <https://neuroshapes.org/>
   PREFIX owl: <http://www.w3.org/2002/07/owl#>
   PREFIX prov: <http://www.w3.org/ns/prov#>
   PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
   PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
   PREFIX schema: <http://schema.org/>
   PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
   
       SELECT ?x ?name
       WHERE {
           ?x rdf:type schema:Association ;
              schema:agent ?agent .
           ?agent schema:name ?name .
       }



## Downloading

In [49]:
jane = Resource(type="Person", name="Jane Doe")

In [50]:
! ls -p ../../data | egrep -v /$

associations.tsv
my_data.xwz
persons.csv


In [51]:
distribution = forge.attach("../../data")

In [52]:
association = Resource(type="Association", agent=jane, distribution=distribution)

In [53]:
forge.register(association)

<action> _register_one
<succeeded> True


In [54]:
forge.download(association, "distribution.contentUrl", "./downloaded/")

In [55]:
! ls ./downloaded/

associations.tsv my_data.xwz      persons.csv


In [56]:
! rm -R ./downloaded/