Pierre-Alexandre v17.09.2019

In [None]:
! python -V

**Installations**

In [None]:
! pip install kgforge

In [None]:
! pip install allensdk

In [None]:
! pip install git+https://github.com/BlueBrain/TMD

**Imports**

In [None]:
import json

In [None]:
from kgforge.core import KnowledgeGraphForge, Resource
from kgforge.core.transforming import Mapping
from kgforge.specializations.mappers import DictionaryMapper

In [None]:
from allensdk.core.cell_types_cache import CellTypesCache
from allensdk.api.queries.cell_types_api import CellTypesApi

In [None]:
import tmd

**Setup**

In [None]:
TOKEN = "<token>"

In [None]:
forge = KnowledgeGraphForge.from_config("bbp_prod_session.yml", bucket="bbp/<project>", token=TOKEN)

---

## User Journey A - Data Integration

1. Retrieve a human neuron morphologies from the Allen Cell Types Database
2. Integrate these neuron morphologies into the Blue Brain Knowledge Graph
    - Retrieve the complete metadata of the selection of neuron morphologies
    - Integrate the Subject entities
    - Integrate the Patched Cell entities
    - Integrate the Neuron Morphology entities
3. Save the mappings to later share them and track their changes

### 1 - Retrieve a human neuron morphologies from the Allen Cell Types Database

In [None]:
ALLEN_DIR = "allen_cell_types_db"

In [None]:
LIMIT = 2

In [None]:
ctc = CellTypesCache(manifest_file=f"{ALLEN_DIR}/manifest.json")

In [None]:
human_cells = ctc.get_cells(species=[CellTypesApi.HUMAN], require_reconstruction=True)

In [None]:
human_cell_ids = [x["id"] for x in human_cells][:LIMIT]

In [None]:
human_cell_reconstructions = [ctc.get_reconstruction(x) for x in human_cell_ids]

### 2 - Integrate these neuron morphologies into the Blue Brain Knowledge Graph

Data integrated into the Blue Brain Knowledge Graph follow the Blue Brain Knowledge Graph Schema.

In the current case, this means that the data from the Allen Cell Types Database should be represented this way:

![Provenance](https://docs.google.com/uc?id=1Hoz3wK3vNkLxdhKNZXK53NOE6qKuBh7o)

**Retrieve the complete metadata of the selection of neuron morphologies**

In [None]:
with open(f"{ALLEN_DIR}/cells.json") as f:
    allen_cell_types_metadata = json.load(f)

In [None]:
human_cell_metadata = [x for x in allen_cell_types_metadata if x["specimen__id"] in human_cell_ids]

**Integrate the Subject entities**

In [None]:
subject_mapping = Mapping("""
    type: Subject
    id: forge.identifiers.format("subjects", x.donor__id)
    identifier: x.donor__id
    name: x.donor__name
    sex: forge.ontologies.resolve("sex", x.donor__sex)
    species: forge.ontologies.resolve("species", x.donor__species)
""")

In [None]:
subjects = forge.transforming.map(human_cell_metadata, DictionaryMapper, subject_mapping)

In [None]:
forge.modeling.validate(subjects)

In [None]:
forge.storing.register(subjects)

**Integrate the Patched Cell entities**

In [None]:
patchedcell_mapping = Mapping("""
    type: PatchedCell
    id: forge.identifiers.format("patchedcells", x.specimen__id)
    brainLocation:
    {
        type: BrainLocation
        brainRegion:
        {
            id: f"http://api.brain-map.org/api/v2/data/Structure/{x.structure__id}"
            label: x.structure__acronym
        }
    }
    contribution:
    {
        type: Contribution
        agent:
        {
            # 'Organization' is a subclass of 'Agent'.
            type: Organization
            id: https://www.grid.ac/institutes/grid.417881.3
        }
    }
    derivation:
    {
        type: Derivation
        entity:
        {
            # 'Subject' is a subclass of 'Entity'.
            type: Subject
            id: x.donor__id
        }
    }
    identifier: x.specimen__id
    name: x.specimen__name
    # This property is not part of the PatchedCell shape at the moment (09.08.2019).
    subject:
    {
        type: Subject
        id: x.donor__id
    }
""")

In [None]:
patchedcells = forge.transforming.map(human_cell_metadata, DictionaryMapper, patchedcell_mapping)

In [None]:
forge.modeling.valdiate(patchedcells)

In [None]:
forge.storing.register(patchedcells)

**Integrate the Neuron Morphology entities**

In [None]:
neuronmorphology_mapping = Mapping("""
    # There is no NeuronMorphologyShape nor NeuronMorphology type in Neuroshapes at the moment (09.08.2019).
    # Using ReconstructedPatchedCellShape meanwhile.
    type: NeuronMorphology
    id: forge.identifiers.format("neuronmorphologies", x.specimen__id)
    # This property is not part of the ReconstructedPatchedCell shape at the moment (09.08.2019).
    apicalDendrite: x.tag__apical
    brainLocation:
    {
        type: BrainLocation
        brainRegion:
        {
            id: f"http://api.brain-map.org/api/v2/data/Structure/{x.structure__id}"
            label: x.structure__acronym
        }
        coordinatesInBrainAtlas:
        {
            valueX: x.csl__x
            valueY: x.csl__y
            valueZ: x.csl__z
        }
        layer: forge.ontologies.resolve("brain region", x.structure__layer, "layer")
    }
    contribution:
    {
        type: Contribution
        agent:
        {
            # 'Organization' is a subclass of 'Agent'.
            type: Organization
            id: https://www.grid.ac/institutes/grid.417881.3
        }
    }
    derivation:
    [
        {
            type: Derivation
            entity:
            {
                # 'Subject' is a subclass of 'Entity'.
                type: Subject
                id: x.donor__id
            }
        }
        {
            type: Derivation
            entity:
            {
                # 'PatchedCell' is a subclass of 'Entity'.
                type: PatchedCell
                id: x.specimen__id
            }
        }
    ]
    distribution: forge.files.as_resource(f"./allen_cell_types_db/specimen_{x.specimen__id}/reconstruction.swc")
    identifier: x.specimen__id
    name: x.specimen__name
    # This property is not part of the ReconstructedPatchedCell shape at the moment (09.08.2019).
    subject:
    {
        type: Subject
        id: x.donor__id
    }
""")

In [None]:
neuronmorphologies = forge.transforming.map(human_cell_metadata, DictionaryMapper, neuronmorphology_mapping)

In [None]:
forge.modeling.validate(neuronmorphologies)

In [None]:
forge.storing.register(neuronmorphologies)

### [NEW] 3 - Save the mappings to later share them and track their changes

In [None]:
MAPPINGS_FOLDER = "./neuroshapes/mappings/allen_cell_types_database_v2019-08-08"

In [None]:
subject_mapping.save(f"{MAPPINGS_FOLDER}/subject.hjson")

In [None]:
patchedcell_mapping.save(f"{MAPPINGS_FOLDER}/patchedcell.hjson")

In [None]:
neuronmorphology_mapping.save(f"{MAPPINGS_FOLDER}/neuronmorphology.hjson")

---

## User Journey B - Data Exploration

1. Discover which neuron morphologies are in the Blue Brain Knowledge Graph
2. Regroup as a dataset a selection of these neuron morphologies
    - Select neuron morphologies in the cortical layer V and with intact apical dendrites
    - Register the selected neuron morphologies as a dataset
    - Give this first revision of the dataset a human-friendly name

In [None]:
p = forge.modeling.paths("NeuronMorphology")

### [NEW] 1 - Discover which neuron morphologies are in the Blue Brain Knowledge Graph

In [None]:
forge.querying.search(p.type == "NeuronMorphology")

### 2 - Regroup as a dataset a selection of these neuron morphologies

**Select neuron morphologies in the cortical layer V and with intact apical dendrites**

In [None]:
neuronmorphologies = forge.querying.search(p.type == "NeuronMorphology", p.brainLocation.layer == "5",
                                           p.apicalDendrite == "intact", resolving="fuzzy", lookup="children")

**Register the selected neuron morphologies as a dataset**

In [None]:
AGENT_ID = "<contributor ID>"

In [None]:
contribution = Resource(type="Contribution", agent=AGENT_ID)

In [None]:
has_part = forge.transforming.reshape(neuronmorphologies, keep=["id", "type", "name", "distribution.contentUrl"],
                                      versioned=True)

In [None]:
# According to Neuroshapes on 09.08.2019, 'type', 'subject', 'brainLocation' are also required.
dataset = Resource(type="Dataset",
                   name="All layer 5 morphologies with intact apical dendrites",
                   contribution=contribution,
                   description="Neuron morphologies to be used for Topological Morphology Descriptor analysis",
                   # This property is not part of the Dataset shape at the moment (09.08.2019).
                   hasPart=has_part)

In [None]:
forge.storing.register(dataset)

**Give the first revision of the dataset a human-friendly label**

In [None]:
TAG = "v2019-08-20"

In [None]:
forges.storing.name_version(dataset, TAG)

---

## User Journey C - Data Analytics

1. Retrieve a specific dataset from the Blue Brain Knowledge Graph
    - Retrieve the dataset entity
    - Download the reconstruction files of the neuron morphologies of the dataset
2. Perform a topological analysis of the neuron morphologies from the dataset
    - Visualize the persistence diagram
    - Visualize the persistence barcode
    - Visualize and save the persistence image
3. Register the analysis result with its provenance into the Blue Brain Knwoledge Graph

In [None]:
DOWNLOAD_FOLDER = f"./reconstructions_{TAG}/"

In [None]:
ANALYSIS_FOLDER = "./analysis/"

In [None]:
OUTPUT_NAME = "persistence_image"

### 1 - Retrieve a specific dataset from the Blue Brain Knowledge Graph

**Retrieve the dataset entity**

In [None]:
dataset = forge.querying.retrieve(id="<dataset ID>", version=TAG)

**Download the reconstruction files of the neuron morphologies of the dataset**

In [None]:
forge.querying.download(dataset, follow="hasPart.distribution.contentUrl", DOWNLOAD_FOLDER)

### 2 - Perform a topological analysis of the neuron morphologies from the dataset

In [None]:
from pathlib import Path

In [None]:
Path(DOWNLOAD_FOLDER).mkdir(exist_ok=True)

In [None]:
pop = tmd.io.load_population(DOWNLOAD_FOLDER)

In [None]:
phs = [tmd.methods.get_persistence_diagram(x.apical[0]) for x in pop.neurons]

In [None]:
phs_flattened = tmd.analysis.collapse(phs)

**Visualize the persistence diagram**

In [None]:
from tmd.view import plot

In [None]:
plot.diagram(phs_flattened)

**Visualize the persistence barcode**

In [None]:
plot.barcode(phs_flattened)

**Visualize and save the persistence image**

In [None]:
plot.persistence_image(phs_flattened, output_path=ANALYSIS_FOLDER, output_name=OUTPUT_NAME)

### 3 - Register the analysis result with its provenance into the Blue Brain Knwoledge Graph

In [None]:
derived = forge.transforming.reshape(dataset, keep=["id", "type"], versioned=True)

In [None]:
# According to Neuroshapes on 09.08.2019, 'used' (2 times) ans 'generated' are also required.
analysis = Resource(type="Analysis",
                    name="Persistence image",
                    derivation=Resource(type="Derivation", entity=derived),
                    distribution=forge.files.as_resource(f"./{ANALYSIS_FOLDER}/{OUTPUT_NAME}.png"),
                    contribution=Resource(type="Contribution", agent=AGENT_ID))

In [None]:
forge.storing.register(analysis)