# Get Atlas Release Dataset

## Imports

In [1]:
import json
import rdflib
import getpass
import pandas as pd
from rdflib import RDF, RDFS, XSD, OWL, URIRef, BNode, SKOS
import pprint
from kgforge.core import KnowledgeGraphForge
from kgforge.specializations.resources import Dataset

## Setup
Get an authentication token

For now, the [Nexus web application](https://bbp.epfl.ch/nexus/web) can be used to get a token. We are looking for other simpler alternatives.

- Step 1: From the opened web page, click on the login button on the right corner and follow the instructions.

![login-ui](./login-ui.png)

- Step 2: At the end you’ll see a token button on the right corner. Click on it to copy the token.

![login-ui](./copy-token.png)


In [2]:
TOKEN = ""

In [3]:
endpoint_staging = "https://staging.nise.bbp.epfl.ch/nexus/v1"
endpoint_prod = "https://bbp.epfl.ch/nexus/v1"
endpoint=endpoint_prod

forge = KnowledgeGraphForge("prod-forge-nexus.yml",
                            token=TOKEN,
                            endpoint=endpoint, 
                            bucket="bbp/atlas",
                            searchendpoints= {"sparql":{"endpoint": "https://bbp.epfl.ch/neurosciencegraph/data/views/aggreg-sp/dataset"}})

In [4]:
# Install dependencies
!pip install jsonpath_ng



## Set the atlas release id
These atlas releases can be explored through the atlas web app:

* dev: https://bluebrainatlas.kcpdev.bbp.epfl.ch/atlas
* prod: https://bbp.epfl.ch/atlas


In [5]:

Prod_BBP_Mouse_Brain_Atlas_Release = "https://bbp.epfl.ch/neurosciencegraph/data/4906ab85-694f-469d-962f-c0174e901885" 
Prod_METype_Composition_Density= "https://bbp.epfl.ch/neurosciencegraph/data/celldensities/760b887c-759a-4ded-98e6-5d2712349eb2"
Staging_BBP_Mouse_Brain_Atlas_Release = "https://bbp.epfl.ch/neurosciencegraph/data/brainatlasrelease/c96c71a8-4c0d-4bc1-8a1a-141d9ed6693d" 
BRO = 'http://bbp.epfl.ch/neurosciencegraph/ontologies/core/brainregion'

atlas_release_id = Prod_BBP_Mouse_Brain_Atlas_Release

## Get the atlas release high level metadata

In [6]:
atlas_release = forge.retrieve(atlas_release_id)

In [7]:
print(atlas_release)

{
    context: https://bbp.neuroshapes.org
    id: https://bbp.epfl.ch/neurosciencegraph/data/4906ab85-694f-469d-962f-c0174e901885
    type:
    [
        AtlasRelease
        BrainAtlasRelease
    ]
    brainTemplateDataLayer:
    {
        id: https://bbp.epfl.ch/neurosciencegraph/data/cce06e96-85a4-403d-8b87-634a1554cdb9
        type: BrainTemplateDataLayer
    }
    description: The official Atlas of the Blue Brain Project, derivated from AIBS Mouse CCF v3 (2017)
    hemisphereVolume:
    {
        id: https://bbp.epfl.ch/neurosciencegraph/data/392ce23b-19de-471b-a5ef-c7744d7e450e
        type:
        [
            VolumetricDataLayer
            Dataset
            HemisphereAnnotationDataLayer
        ]
    }
    name: Blue Brain Atlas
    parcellationOntology:
    {
        id: https://bbp.epfl.ch/neurosciencegraph/data/0518dd0b-cbc7-43ef-a75f-45631059c8c5
        type: ParcellationOntology
    }
    parcellationVolume:
    {
        id: https://bbp.epfl.ch/neurosciencegraph/da

In [8]:
# Get the current revision of the Atlas release
atlas_release._store_metadata["_rev"]

3

## Get the atlas hierarchy

In [9]:
parcellation_ontology = forge.retrieve(atlas_release.parcellationOntology.id, cross_bucket=True)

In [10]:
print(parcellation_ontology)

{
    context: https://bbp.neuroshapes.org
    id: https://bbp.epfl.ch/neurosciencegraph/data/0518dd0b-cbc7-43ef-a75f-45631059c8c5
    type:
    [
        Ontology
        Entity
        ParcellationOntology
    ]
    label: BBP Mouse Brain region ontology
    distribution:
    [
        {
            type: DataDownload
            atLocation:
            {
                type: Location
                location: file:///gpfs/bbp.cscs.ch/data/project/proj39/nexus/bbp/atlas/b/3/0/5/6/1/c/a/hierarchy_l23split.json
                store:
                {
                    id: https://bbp.epfl.ch/neurosciencegraph/data/3806a8d9-0130-4535-bac6-3b1fecf855f5
                    type: RemoteDiskStorage
                    _rev: 1
                }
            }
            contentSize:
            {
                unitCode: bytes
                value: 3437057
            }
            contentUrl: https://bbp.epfl.ch/nexus/v1/files/bbp/atlas/b43a590b-c4b1-4956-84ad-6156eacb8841
           

In [11]:
parcellation_ontology_copy = Dataset.from_resource(forge, parcellation_ontology, store_metadata=True)
parcellation_ontology_copy.distribution = [d for d in parcellation_ontology.distribution if d.encodingFormat == "application/json"]

forge.download(parcellation_ontology_copy, "distribution.contentUrl", ".", overwrite=True, cross_bucket=True)

## Get a brain region metadata from it's name or accronym

In [12]:
name_accronym = "Medial septal nucleus"

In [13]:
from kgforge.core.commons.strategies import ResolvingStrategy
brain_region = forge.resolve(name_accronym, scope="ontology", target="terms", strategy=ResolvingStrategy.EXACT_CASEINSENSITIVE_MATCH)
print(brain_region)

{
    id: http://api.brain-map.org/api/v2/data/Structure/564
    type: Class
    label: Medial septal nucleus
    isDefinedBy: http://bbp.epfl.ch/neurosciencegraph/ontologies/core/brainregion
    notation: MS
    prefLabel: Medial septal nucleus
    subClassOf: nsg:BrainRegion
}


In [14]:
# Get accronym
brain_region.notation

'MS'

## Get hierarchy relations directly from the ontology

Each brain region in the ontology has their children listed in the `hasPart` property, and all the leaf nodes as `hasLeafRegionPart`

#### Get all leaf nodes from a region

In [15]:
target = 'FRP'
query_leaf = """
SELECT DISTINCT ?id ?acronym ?leaf
WHERE{
    ?id subClassOf* BrainRegion ;
    notation ?acronym ;
    hasLeafRegionPart ?leaf.
"""
query_leaf += f"FILTER (?acronym = \"{target}\") \n" + "}"

In [16]:
brs_leaves = forge.sparql(query_leaf, cross_bucket=True) 

In [17]:
forge.as_dataframe(brs_leaves)

Unnamed: 0,id,acronym,leaf
0,http://api.brain-map.org/api/v2/data/Structure...,FRP,http://api.brain-map.org/api/v2/data/Structure...
1,http://api.brain-map.org/api/v2/data/Structure...,FRP,http://api.brain-map.org/api/v2/data/Structure...
2,http://api.brain-map.org/api/v2/data/Structure...,FRP,http://api.brain-map.org/api/v2/data/Structure...
3,http://api.brain-map.org/api/v2/data/Structure...,FRP,http://api.brain-map.org/api/v2/data/Structure/68
4,http://api.brain-map.org/api/v2/data/Structure...,FRP,http://api.brain-map.org/api/v2/data/Structure...
5,http://api.brain-map.org/api/v2/data/Structure...,FRP,http://api.brain-map.org/api/v2/data/Structure...


### Return only leaves that are represented in the atlas annotation

In [18]:
target = 'FRP'
query_leaf2 = """
SELECT DISTINCT ?id ?acronym ?leaf
WHERE{
    ?id subClassOf* BrainRegion ;
    notation ?acronym ;
    hasLeafRegionPart ?leaf.
    ?leaf representedInAnnotation true .
"""
query_leaf2 += f"FILTER (?acronym = \"{target}\") \n" + "}"

In [19]:
brs_leaves2 = forge.sparql(query_leaf2, cross_bucket=True) 

In [20]:
forge.as_dataframe(brs_leaves2)

Unnamed: 0,id,acronym,leaf
0,http://api.brain-map.org/api/v2/data/Structure...,FRP,http://api.brain-map.org/api/v2/data/Structure/68
1,http://api.brain-map.org/api/v2/data/Structure...,FRP,http://api.brain-map.org/api/v2/data/Structure...
2,http://api.brain-map.org/api/v2/data/Structure...,FRP,http://api.brain-map.org/api/v2/data/Structure...


We can inspect the brain region "FRP"

In [21]:
FRP = forge.retrieve(brs_leaves[0].leaf, cross_bucket=True)

In [22]:
print(FRP)

{
    context: https://neuroshapes.org
    id: http://api.brain-map.org/api/v2/data/Structure/526157192
    type: Class
    label: Frontal pole, layer 5
    color_hex_triplet: 268F45
    graph_order: 9
    hasHierarchyView:
    [
        https://neuroshapes.org/BrainRegion
    ]
    hasLayerLocationPhenotype:
    [
        http://purl.obolibrary.org/obo/UBERON_0005394
    ]
    hemisphere_id: 3
    identifier: 526157192
    isDefinedBy: http://bbp.epfl.ch/neurosciencegraph/ontologies/core/brainregion
    isLayerPartOf:
    [
        http://bbp.epfl.ch/neurosciencegraph/ontologies/core/brainregion/Isocortex_L5
    ]
    isPartOf:
    [
        http://api.brain-map.org/api/v2/data/Structure/184
    ]
    notation: FRP5
    prefLabel: Frontal pole, layer 5
    representedInAnnotation: false
    st_level: 11
    subClassOf:
    [
        https://neuroshapes.org/BrainRegion
    ]
}


Similarly to the nodes, we can get the direct children of a brain region, present in the annotation, by querying using the `hasPart` property

In [23]:
target = 'MOp'
query_child = """
SELECT DISTINCT ?id ?acronym ?child
WHERE{
    ?id subClassOf* BrainRegion ;
    notation ?acronym ;
    hasPart ?child.
    ?child representedInAnnotation true.
"""
query_child += f"FILTER (?acronym = \"{target}\") \n" + "}"

In [24]:
brs_children = forge.sparql(query_child)

In [25]:
forge.as_dataframe(brs_children)

Unnamed: 0,id,acronym,child
0,http://api.brain-map.org/api/v2/data/Structure...,MOp,http://api.brain-map.org/api/v2/data/Structure...
1,http://api.brain-map.org/api/v2/data/Structure...,MOp,http://api.brain-map.org/api/v2/data/Structure...
2,http://api.brain-map.org/api/v2/data/Structure...,MOp,http://api.brain-map.org/api/v2/data/Structure...
3,http://api.brain-map.org/api/v2/data/Structure...,MOp,http://api.brain-map.org/api/v2/data/Structure...
4,http://api.brain-map.org/api/v2/data/Structure...,MOp,http://api.brain-map.org/api/v2/data/Structure...


If we know the exact brain region, instead of using sparql one can simply resolve teh brain region and get the children directly from the attribute `hasPart`

In [26]:
mop = forge.resolve('MOp', scope='ontology', strategy='EXACT_MATCH')

In [27]:
print(mop)

{
    id: http://api.brain-map.org/api/v2/data/Structure/985
    type: Class
    label: Primary motor area
    isDefinedBy: http://bbp.epfl.ch/neurosciencegraph/ontologies/core/brainregion
    notation: MOp
    prefLabel: Primary motor area
    subClassOf: nsg:BrainRegion
}


In [28]:
full_mop = forge.retrieve(mop.id, cross_bucket=True)

## Get parcellation (annotation) volume

In [29]:
parcellation_volume = forge.retrieve(atlas_release.parcellationVolume.id)

In [30]:
print(parcellation_volume)

{
    context: https://bbp.neuroshapes.org
    id: https://bbp.epfl.ch/neurosciencegraph/data/231f6e2e-6366-4ddc-94b6-35ab50c076c0
    type:
    [
        VolumetricDataLayer
        BrainParcellationDataLayer
        Dataset
    ]
    atlasRelease:
    {
        id: https://bbp.epfl.ch/neurosciencegraph/data/4906ab85-694f-469d-962f-c0174e901885
    }
    brainLocation:
    {
        atlasSpatialReferenceSystem:
        {
            id: https://bbp.epfl.ch/neurosciencegraph/data/allen_ccfv3_spatial_reference_system
            type:
            [
                AtlasSpatialReferenceSystem
                BrainAtlasSpatialReferenceSystem
            ]
        }
        brainRegion:
        {
            id: http://api.brain-map.org/api/v2/data/Structure/997
            label: root
        }
    }
    bufferEncoding: gzip
    componentEncoding: uint32
    contribution:
    {
        type: Contribution
        agent:
        {
            id: https://ror.org/02s376052
            type:


In [31]:
forge.download(parcellation_volume, "distribution.contentUrl", ".", overwrite=True)

## Get orientation field and Direction vectors volumes

In [32]:
query = {
          "type":"CellOrientationField", 
          "atlasRelease":{"@id":atlas_release_id},
          "brainLocation":{"brainRegion":{"id":"http://api.brain-map.org/api/v2/data/Structure/997"}} # root brain region
        }
cell_orientation_field = forge.search(query)
print(f"{len(cell_orientation_field)} found")

2 found


In [33]:
print(cell_orientation_field)

[Resource(_last_action=Action(error=None, message=None, operation='search', succeeded=True), _validated=False, _synchronized=False, _store_metadata={'id': 'https://bbp.epfl.ch/neurosciencegraph/data/a6b1f799-7b2c-470e-8668-3f15148965db', '_constrainedBy': 'https://neuroshapes.org/dash/volumetricdatalayer', '_createdAt': '2022-07-13T13:39:02.570Z', '_createdBy': 'https://bbp.epfl.ch/nexus/v1/realms/bbp/users/lurie', '_deprecated': False, '_incoming': 'https://bbp.epfl.ch/nexus/v1/resources/bbp/atlas/datashapes:volumetricdatalayer/a6b1f799-7b2c-470e-8668-3f15148965db/incoming', '_outgoing': 'https://bbp.epfl.ch/nexus/v1/resources/bbp/atlas/datashapes:volumetricdatalayer/a6b1f799-7b2c-470e-8668-3f15148965db/outgoing', '_project': 'https://bbp.epfl.ch/nexus/v1/projects/bbp/atlas', '_rev': 4, '_schemaProject': 'https://bbp.epfl.ch/nexus/v1/projects/neurosciencegraph/datamodels', '_self': 'https://bbp.epfl.ch/nexus/v1/resources/bbp/atlas/datashapes:volumetricdatalayer/a6b1f799-7b2c-470e-8668

In [34]:
forge.download(cell_orientation_field, "distribution.contentUrl", ".", overwrite=True)

## Get the released metype densities

In [35]:
metype_density_release = forge.retrieve(id=Prod_METype_Composition_Density,
                                     cross_bucket=True)

In [36]:
forge.download(metype_density_release,path=".", follow="distribution.contentUrl", cross_bucket=True, overwrite=True) # downloads a json file containing released metype densities along with their version

In [37]:
with open(f"./{metype_density_release.distribution.name}", "r") as f:
    metype_density_release_json= json.load(f)

In [38]:
from jsonpath_ng import jsonpath, parse

jsonpath_expr = parse('hasPart[*].hasPart[*].hasPart[*].@id')
metype_density_release_volume_ids = [(match.value, match.context.value["_rev"]) for match in jsonpath_expr.find(metype_density_release_json)] # yields [(metype volume id, revision)]
metype_density_release_volumes = [forge.retrieve(id=volume_id[0], version=volume_id[1], cross_bucket=True) for volume_id in metype_density_release_volume_ids] # metype density volumes

len(metype_density_release_volumes)

245

In [39]:
# Collect metadata as pandas dataframe 
reshaped_resources = forge.reshape(metype_density_release_volumes, keep=["id","type", "annotation.hasBody.id", "annotation.hasBody.label",
                     "brainLocation.brainRegion.id", "brainLocation.brainRegion.label", "distribution.atLocation.location"])
df = forge.as_dataframe(reshaped_resources, nesting=".")

df["mtype"] = df.apply(lambda row: (row.annotation[0]["hasBody"]["label"],row.annotation[0]["hasBody"]["id"]) , axis=1) 
df["etype"] = df.apply(lambda row: (row.annotation[1]["hasBody"]["label"],row.annotation[1]["hasBody"]["id"]) , axis=1) 
type_column = df.pop('type')
mtype_column = df.pop('mtype')
etype_column = df.pop('etype')

df.insert(0, 'type', type_column)
df.insert(1, 'mtype', mtype_column)
df.insert(2, 'etype', etype_column)
df.drop(columns="annotation")

df.head(100)

Unnamed: 0,type,mtype,etype,id,annotation,brainLocation.brainRegion.id,brainLocation.brainRegion.label,distribution.atLocation.location
0,"[NeuronDensity, VolumetricDataLayer, CellDensi...","(L23_BP, http://uri.interlex.org/base/ilx_0383...","(dSTUT, http://uri.interlex.org/base/ilx_0738202)",https://bbp.epfl.ch/neurosciencegraph/data/88e...,[{'hasBody': {'id': 'http://uri.interlex.org/b...,http://api.brain-map.org/api/v2/data/Structure...,root,file:///gpfs/bbp.cscs.ch/data/project/proj39/n...
1,"[NeuronDensity, VolumetricDataLayer, CellDensi...","(L23_BP, http://uri.interlex.org/base/ilx_0383...","(cNAC, http://uri.interlex.org/base/ilx_0738201)",https://bbp.epfl.ch/neurosciencegraph/data/9ab...,[{'hasBody': {'id': 'http://uri.interlex.org/b...,http://api.brain-map.org/api/v2/data/Structure...,root,file:///gpfs/bbp.cscs.ch/data/project/proj39/n...
2,"[NeuronDensity, VolumetricDataLayer, CellDensi...","(L23_BP, http://uri.interlex.org/base/ilx_0383...","(bAC, http://uri.interlex.org/base/ilx_0738199)",https://bbp.epfl.ch/neurosciencegraph/data/2df...,[{'hasBody': {'id': 'http://uri.interlex.org/b...,http://api.brain-map.org/api/v2/data/Structure...,root,file:///gpfs/bbp.cscs.ch/data/project/proj39/n...
3,"[NeuronDensity, VolumetricDataLayer, CellDensi...","(L23_BP, http://uri.interlex.org/base/ilx_0383...","(bIR, http://uri.interlex.org/base/ilx_0738206)",https://bbp.epfl.ch/neurosciencegraph/data/6e2...,[{'hasBody': {'id': 'http://uri.interlex.org/b...,http://api.brain-map.org/api/v2/data/Structure...,root,file:///gpfs/bbp.cscs.ch/data/project/proj39/n...
4,"[NeuronDensity, VolumetricDataLayer, CellDensi...","(L23_BP, http://uri.interlex.org/base/ilx_0383...","(bNAC, http://uri.interlex.org/base/ilx_0738203)",https://bbp.epfl.ch/neurosciencegraph/data/a40...,[{'hasBody': {'id': 'http://uri.interlex.org/b...,http://api.brain-map.org/api/v2/data/Structure...,root,file:///gpfs/bbp.cscs.ch/data/project/proj39/n...
...,...,...,...,...,...,...,...,...
95,"[NeuronDensity, VolumetricDataLayer, CellDensi...","(L4_LBC, http://uri.interlex.org/base/ilx_0383...","(bAC, http://uri.interlex.org/base/ilx_0738199)",https://bbp.epfl.ch/neurosciencegraph/data/104...,[{'hasBody': {'id': 'http://uri.interlex.org/b...,http://api.brain-map.org/api/v2/data/Structure...,root,file:///gpfs/bbp.cscs.ch/data/project/proj39/n...
96,"[NeuronDensity, VolumetricDataLayer, CellDensi...","(L4_LBC, http://uri.interlex.org/base/ilx_0383...","(cNAC, http://uri.interlex.org/base/ilx_0738201)",https://bbp.epfl.ch/neurosciencegraph/data/699...,[{'hasBody': {'id': 'http://uri.interlex.org/b...,http://api.brain-map.org/api/v2/data/Structure...,root,file:///gpfs/bbp.cscs.ch/data/project/proj39/n...
97,"[NeuronDensity, VolumetricDataLayer, CellDensi...","(L4_LBC, http://uri.interlex.org/base/ilx_0383...","(cAC, http://uri.interlex.org/base/ilx_0738197)",https://bbp.epfl.ch/neurosciencegraph/data/045...,[{'hasBody': {'id': 'http://uri.interlex.org/b...,http://api.brain-map.org/api/v2/data/Structure...,root,file:///gpfs/bbp.cscs.ch/data/project/proj39/n...
98,"[NeuronDensity, VolumetricDataLayer, CellDensi...","(L4_LBC, http://uri.interlex.org/base/ilx_0383...","(bSTUT, http://uri.interlex.org/base/ilx_0738200)",https://bbp.epfl.ch/neurosciencegraph/data/0cb...,[{'hasBody': {'id': 'http://uri.interlex.org/b...,http://api.brain-map.org/api/v2/data/Structure...,root,file:///gpfs/bbp.cscs.ch/data/project/proj39/n...


In [40]:
forge.download(metype_density_release_volumes, path="./test", follow="distribution.contentUrl", cross_bucket=True, overwrite=True)