# Querying external database sources of interest

* Enable users to integrate data from external databases of interest within BBP KG
* While using the Nexus Forge interface and BMO vocabulary as much as possible as
* While benefiting from out of the box (meta)data transformation to make them ready for BBP internal pipelines and applications
* Demo with Mouselight, NeuroElectro, UniProt

In [1]:
from kgforge.core import KnowledgeGraphForge
from kgforge.specializations.resources import Dataset

A configuration file is needed in order to create a KnowledgeGraphForge session. A configuration can be generated using the notebook [00-Initialization.ipynb](00%20-%20Initialization.ipynb).

In [2]:
endpoint = "https://staging.nise.bbp.epfl.ch/nexus/v1"
BUCKET = "neurosciencegraph/datamodels"
forge = KnowledgeGraphForge("../../configurations/database-sources/prod-nexus-sources.yml", endpoint=endpoint, bucket=BUCKET)

myname UniProt
store_config {'name': 'DemoStore'}
model_config {'origin': 'directory', 'source': '../../../tests/data/demo-model/'}
myname NeuroElectro
store_config {'endpoint': 'https://staging.nise.bbp.epfl.ch/nexus/v1', 'searchendpoints': {'sparql': {'endpoint': 'https://bluebrain.github.io/nexus/vocabulary/defaultSparqlIndex'}, 'elastic': {'endpoint': 'https://bbp.epfl.ch/neurosciencegraph/data/views/aggreg-es/dataset', 'mapping': 'https://bbp.epfl.ch/neurosciencegraph/data/views/es/dataset', 'default_str_keyword_field': 'keyword'}}, 'vocabulary': {'metadata': {'iri': 'https://bluebrain.github.io/nexus/contexts/metadata.json', 'local_iri': 'https://bluebrainnexus.io/contexts/metadata.json'}, 'namespace': 'https://bluebrain.github.io/nexus/vocabulary/', 'deprecated_property': 'https://bluebrain.github.io/nexus/vocabulary/deprecated', 'project_property': 'https://bluebrain.github.io/nexus/vocabulary/project'}, 'max_connection': 50, 'token': 'eyJhbGciOiJSUzI1NiIsInR5cCIgOiAiSldUIiwia2

# List of Data sources

In [3]:
sources = forge.db_sources(pretty=False)

In [4]:
sources

{'UniProt': DatabaseSource(_last_action=None, _validated=False, _synchronized=False, _store_metadata=None, type='Database', _dirpath='/Users/cgonzale/Documents/code/nexus-forge/examples/database_sources/UniProt', _forge=<kgforge.core.forge.KnowledgeGraphForge object at 0x7f7cb8a7ad50>, _from_forge=True, _inner_sync=False, _model=DemoModel(service=<kgforge.specializations.models.demo_model.ModelLibrary object at 0x7f7cb8a93590>, source='../../../tests/data/demo-model/'), _store=DemoStore(context=None, bucket=None, endpoint=None, file_mapping=None, metadata_context=None, model_context=None, service=<kgforge.specializations.stores.demo_store.StoreLibrary object at 0x7f7cb607d2d0>, token=None, versioned_id_template=None), model={'origin': 'directory', 'source': '../../../tests/data/demo-model/'}, name='UniProt', store={'name': 'DemoStore'}),
 'NeuroElectro': DatabaseSource(_last_action=None, _validated=False, _synchronized=False, _store_metadata=None, type='Database', _dirpath='/Users/cgon

In [5]:

data = {
        'store':{
          'name': 'DemoStore'
        },
        'protocol': 'https://www.janelia.org/project-team/mouselight/resources', 
        'license': [{'id': 'https://creativecommons.org/licenses/by-nc/4.0', 
                     'label': 'CC BY-NC 4.0'}
                   ],
        'definition': {
            'origin': 'directory',
            'source': '/Users/cgonzale/Documents/code/nexus-forge/examples/database_sources/MouseLight/'
        },
        'model': { 
          'name': 'DemoModel',
          'origin': 'directory',
          'source': "../../../tests/data/demo-model/" 
        }
}


In [6]:
from kgforge.specializations.resources import DatabaseSource
ds = DatabaseSource(forge, name="MouseLight", from_forge=False, **data)

myname MouseLight
store_config {'name': 'DemoStore'}
model_config {'origin': 'directory', 'source': '../../../tests/data/demo-model/'}


In [7]:
print(ds)

{
    type: Database
    _store:
    {
        context: null
        bucket: null
        endpoint: null
        file_mapping: null
        metadata_context: null
        model_context: null
        service:
        {
            archives: {}
            records: {}
            tags: {}
        }
        token: null
        versioned_id_template: null
    }
    definition:
    {
        origin: directory
        source: /Users/cgonzale/Documents/code/nexus-forge/examples/database_sources/MouseLight/
    }
    license:
    [
        {
            id: https://creativecommons.org/licenses/by-nc/4.0
            label: CC BY-NC 4.0
        }
    ]
    model:
    {
        origin: directory
        source: ../../../tests/data/demo-model/
    }
    name: MouseLight
    protocol: https://www.janelia.org/project-team/mouselight/resources
    store:
    {
        name: DemoStore
    }
}


In [8]:
print(sources)

{'UniProt': DatabaseSource(_last_action=None, _validated=False, _synchronized=False, _store_metadata=None, type='Database', _dirpath='/Users/cgonzale/Documents/code/nexus-forge/examples/database_sources/UniProt', _forge=<kgforge.core.forge.KnowledgeGraphForge object at 0x7f7cb8a7ad50>, _from_forge=True, _inner_sync=False, _model=DemoModel(service=<kgforge.specializations.models.demo_model.ModelLibrary object at 0x7f7cb8a93590>, source='../../../tests/data/demo-model/'), _store=DemoStore(context=None, bucket=None, endpoint=None, file_mapping=None, metadata_context=None, model_context=None, service=<kgforge.specializations.stores.demo_store.StoreLibrary object at 0x7f7cb607d2d0>, token=None, versioned_id_template=None), model={'origin': 'directory', 'source': '../../../tests/data/demo-model/'}, name='UniProt', store={'name': 'DemoStore'}), 'NeuroElectro': DatabaseSource(_last_action=None, _validated=False, _synchronized=False, _store_metadata=None, type='Database', _dirpath='/Users/cgonz

# Data source metadata

In [9]:
mouselight= sources["MouseLight"]
print(mouselight)

{
    type: Database
    _store:
    {
        context: null
        bucket: null
        endpoint: null
        file_mapping: null
        metadata_context: null
        model_context: null
        service:
        {
            archives: {}
            records: {}
            tags: {}
        }
        token: null
        versioned_id_template: null
    }
    definition:
    {
        origin: directory
        source: /Users/cgonzale/Documents/code/nexus-forge/examples/database_sources/MouseLight/
    }
    license:
    [
        {
            id: https://creativecommons.org/licenses/by-nc/4.0
            label: CC BY-NC 4.0
        }
    ]
    model:
    {
        origin: directory
        source: ../../../tests/data/demo-model/
    }
    name: MouseLight
    protocol: https://www.janelia.org/project-team/mouselight/resources
    store:
    {
        name: DemoStore
    }
}


## Name, description, url, license, protocol => more can be added through configuration

In [10]:
print(mouselight.name)
print(mouselight.protocol)
print(mouselight.license)

MouseLight
https://www.janelia.org/project-team/mouselight/resources
[{'id': 'https://creativecommons.org/licenses/by-nc/4.0', 'label': 'CC BY-NC 4.0'}]


## Get data mappings (hold transformations logic) per data type

* Data mappings are used to transform results obtained from the external data sources so that they are ready for consumption by BBP tools
* Perform automatic ontology linking

In [11]:
forge.mappings("MouseLight", pretty=False)

{'NeuronMorphology': ['DictionaryMapping']}

In [12]:
forge.mappings('UniProt', pretty=True)

Managed mappings for the data source per entity type and mapping type:
   - NeuronElectrophysiologicalFeature:
        * DictionaryMapping


In [13]:
forge.mappings('NeuroElectro', pretty=True)

Managed mappings for the data source per entity type and mapping type:
   - ElectrophysiologicalFeatureAnnotation:
        * DictionaryMapping
   - ParameterAnnotation:
        * DictionaryMapping
   - ParameterBody:
        * DictionaryMapping
   - ScholarlyArticle:
        * DictionaryMapping
   - SeriesBody:
        * DictionaryMapping


In [14]:
from kgforge.specializations.mappings import DictionaryMapping
mapping = forge.mapping("NeuronMorphology", "MouseLight", type=DictionaryMapping)
direct_mapping = mouselight.mapping("NeuronMorphology", type=DictionaryMapping)

In [15]:
print(mapping)

{
    id: forge.format("identifier", "neuronmorphologies/mouselight", x.neurons[0]["idString"])
    type:
    [
        Dataset
        NeuronMorphology
    ]
    brainLocation:
    {
        type: BrainLocation
        brainRegion:
        {
            id: f"http://api.brain-map.org/api/v2/data/Structure/{x.neurons[0]['soma']['allenId']}"
            label: x.neurons[0]["allenLabel"]
        }
        coordinatesInBrainAtlas:
        {
            valueX: x.neurons[0]["soma"]["x"]
            valueY: x.neurons[0]["soma"]["y"]
            valueZ: x.neurons[0]["soma"]["z"]
        }
    }
    contribution:
    {
        type: Contribution
        agent:
        {
            id: https://www.grid.ac/institutes/grid.443970.d
            type: Organization
            label: Janelia Research Campus
        }
    }
    dateCreated: x.neurons[0]["sample"]["date"]
    description: x.neurons[0]["annotationSpace"]["description"]
    distribution: forge.attach(f"./mouselight/{x.neurons[0]['idSt

In [16]:
print(direct_mapping)

{
    id: forge.format("identifier", "neuronmorphologies/mouselight", x.neurons[0]["idString"])
    type:
    [
        Dataset
        NeuronMorphology
    ]
    brainLocation:
    {
        type: BrainLocation
        brainRegion:
        {
            id: f"http://api.brain-map.org/api/v2/data/Structure/{x.neurons[0]['soma']['allenId']}"
            label: x.neurons[0]["allenLabel"]
        }
        coordinatesInBrainAtlas:
        {
            valueX: x.neurons[0]["soma"]["x"]
            valueY: x.neurons[0]["soma"]["y"]
            valueZ: x.neurons[0]["soma"]["z"]
        }
    }
    contribution:
    {
        type: Contribution
        agent:
        {
            id: https://www.grid.ac/institutes/grid.443970.d
            type: Organization
            label: Janelia Research Campus
        }
    }
    dateCreated: x.neurons[0]["sample"]["date"]
    description: x.neurons[0]["annotationSpace"]["description"]
    distribution: forge.attach(f"./mouselight/{x.neurons[0]['idSt

In [17]:
forge.db_sources(with_datatype='NeuronMorphology', pretty=True)

Available Database sources:
MouseLight


In [18]:
props = {'origin': 'store', 'source': 'BlueBrainNexus', 'definition':{'iri': 'some_address'}}

In [None]:
# props = {'origin': 'store', 'source': 'BlueBrainNexus', 'definition':{'iri':}}
# new_db = DatabaseSource(forge, from_forge=False, name='new_db')

# Search and Access data from data source

* Mapping are automatically applied to search results
* takes a mn for now => working on making it faster 

In [None]:
# Type, source or target brain region, 
filters = {"type":"NeuronMorphology"} # More filters (brain regions, ...) will be added
#map=True, use_cache=True, # download=True
resources = forge.search(filters, db_source="MouseLight", limit=2) 
# ADd function for checking datsource health => reqsuire health url from db


In [None]:
len(resources)

0

In [None]:
print(resources[0])

IndexError: list index out of range

# Save in BBP KG (Nexus)

In [None]:
forge.register(resources)

<count> 2
<action> _register_many
<succeeded> False
<error> RegistrationError: resource already exists


## Access

### Set filters

In [None]:
_type = "NeuronMorphology"
filters = {"type": _type}

### Run Query

In [None]:
limit = 10  # You can limit the number of results, pass `None` to fetch all the results

data = forge.search(filters, limit=limit)

print(f"{str(len(data))} dataset(s) of type {_type} found")

10 dataset(s) of type NeuronMorphology found


### Display the results as pandas dataframe

In [None]:
property_to_display = ["id","name","subject","brainLocation.brainRegion.id","brainLocation.brainRegion.label","brainLocation.layer.id","brainLocation.layer.label", "contribution","brainLocation.layer.id","brainLocation.layer.label","distribution.name","distribution.contentUrl","distribution.encodingFormat"]
reshaped_data = forge.reshape(data, keep=property_to_display)

forge.as_dataframe(reshaped_data)

Unnamed: 0,id,brainLocation.brainRegion.id,brainLocation.brainRegion.label,brainLocation.layer,contribution.type,contribution.agent.id,contribution.agent.type,distribution.contentUrl,distribution.encodingFormat,distribution.name,name,subject.type,subject.age.period,subject.age.unitCode,subject.age.value,subject.identifier,subject.name,subject.sex,subject.species,subject.strain
0,https://bbp.epfl.ch/nexus/v1/resources/dke/kgf...,http://api.brain-map.org/api/v2/data/Structure...,VISp5,5,Contribution,https://www.grid.ac/institutes/grid.417881.3,Organization,https://bbp.epfl.ch/nexus/v1/files/dke/kgforge...,application/swc,reconstruction.swc,Scnn1a-Tg3-Cre;Ai14-172530.06.01.01,Subject,Post-natal,,,322489588,Scnn1a-Tg3-Cre;Ai14(GSL)-172530,,Mus musculus,Scnn1a-Tg3-Cre
1,https://bbp.epfl.ch/nexus/v1/resources/dke/kgf...,http://api.brain-map.org/api/v2/data/Structure...,MTG,2,Contribution,https://www.grid.ac/institutes/grid.417881.3,Organization,https://bbp.epfl.ch/nexus/v1/files/dke/kgforge...,application/swc,reconstruction.swc,H16.06.009.01.01.15.01,Subject,Post-natal,yrs,48.0,528574320,H16.06.009,Female,Homo Sapiens,
2,https://bbp.epfl.ch/nexus/v1/resources/dke/kgf...,http://api.brain-map.org/api/v2/data/Structure...,VISp4,4,Contribution,https://www.grid.ac/institutes/grid.417881.3,Organization,https://bbp.epfl.ch/nexus/v1/files/dke/kgforge...,application/swc,reconstruction.swc,Scnn1a-Tg3-Cre;Ai14-187849.06.01.01,Subject,Post-natal,,,475849748,Scnn1a-Tg3-Cre;Ai14(IVSCC)-187849,,Mus musculus,Scnn1a-Tg3-Cre
3,https://bbp.epfl.ch/nexus/v1/resources/dke/kgf...,http://api.brain-map.org/api/v2/data/Structure...,VISp4,4,Contribution,https://www.grid.ac/institutes/grid.417881.3,Organization,https://bbp.epfl.ch/nexus/v1/files/dke/kgforge...,application/swc,reconstruction.swc,Rorb-IRES2-Cre-D;Ai14-197330.06.01.01,Subject,Post-natal,,,479695183,Rorb-IRES2-Cre-D;Ai14-197330,,Mus musculus,Rorb-IRES2-Cre
4,https://bbp.epfl.ch/nexus/v1/resources/dke/kgf...,http://api.brain-map.org/api/v2/data/Structure...,VISpl4,4,Contribution,https://www.grid.ac/institutes/grid.417881.3,Organization,https://bbp.epfl.ch/nexus/v1/files/dke/kgforge...,application/swc,reconstruction.swc,Rorb-IRES2-Cre-D;Ai14-230822.04.02.01,Subject,Post-natal,,,502081962,Rorb-IRES2-Cre-D;Ai14-230822,,Mus musculus,Rorb-IRES2-Cre
5,https://bbp.epfl.ch/nexus/v1/resources/dke/kgf...,http://api.brain-map.org/api/v2/data/Structure...,AnG,2,Contribution,https://www.grid.ac/institutes/grid.417881.3,Organization,https://bbp.epfl.ch/nexus/v1/files/dke/kgforge...,application/swc,reconstruction.swc,H17.06.004.11.05.04,Subject,Post-natal,yrs,71.0,569008241,H17.06.004,Female,Homo Sapiens,
6,https://bbp.epfl.ch/nexus/v1/resources/dke/kgf...,http://api.brain-map.org/api/v2/data/Structure...,MTG,4,Contribution,https://www.grid.ac/institutes/grid.417881.3,Organization,https://bbp.epfl.ch/nexus/v1/files/dke/kgforge...,application/swc,reconstruction.swc,H17.06.009.11.04.02,Subject,Post-natal,yrs,52.0,595954915,H17.06.009,Male,Homo Sapiens,
7,https://bbp.epfl.ch/nexus/v1/resources/dke/kgf...,http://api.brain-map.org/api/v2/data/Structure...,MTG,3,Contribution,https://www.grid.ac/institutes/grid.417881.3,Organization,https://bbp.epfl.ch/nexus/v1/files/dke/kgforge...,application/swc,reconstruction.swc,H16.03.001.01.09.01,Subject,Post-natal,yrs,39.0,518641172,H16.03.001,Male,Homo Sapiens,
8,https://bbp.epfl.ch/nexus/v1/resources/dke/kgf...,http://api.brain-map.org/api/v2/data/Structure...,MFG,5,Contribution,https://www.grid.ac/institutes/grid.417881.3,Organization,https://bbp.epfl.ch/nexus/v1/files/dke/kgforge...,application/swc,reconstruction.swc,H17.06.007.11.08.01,Subject,Post-natal,yrs,42.0,576060516,H17.06.007,Female,Homo Sapiens,
9,https://bbp.epfl.ch/nexus/v1/resources/dke/kgf...,http://api.brain-map.org/api/v2/data/Structure...,VISp5,5,Contribution,https://www.grid.ac/institutes/grid.417881.3,Organization,https://bbp.epfl.ch/nexus/v1/files/dke/kgforge...,application/swc,reconstruction.swc,Cux2-CreERT2;Ai14-205530.03.02.01,Subject,Post-natal,,,485250100,Cux2-CreERT2;Ai14-205530,,Mus musculus,Cux2-CreERT2


### Download

In [None]:
dirpath = "./downloaded/"
forge.download(data, "distribution.contentUrl", dirpath)