# Blue Brain Nexus Workshop - Toronto 2019.11

Objective: Collect, map, ingest, find and download data from the Allen Cell Types Database

<img src="../ingest-allen-celltypes-db-in-nexus/assets/nexus_workshop_data_pipeline.png" width="800">

## Configure: Blue Brain Neuxs environment

Install the required python packages

In [None]:
# !pip install allensdk
# !pip install -U nexus-sdk
# !pip install rdflib
# !pip install SPARQLWrapper

Import the required python packages

In [None]:
import requests
import json
import getpass
import pandas as pd
import os
import matplotlib.pyplot as plt

from allensdk.core.cell_types_cache import CellTypesCache
from allensdk.api.queries.cell_types_api import CellTypesApi
from allensdk.core.cell_types_cache import ReporterStatus as RS
from allensdk.core.swc import Marker

from sparqlendpointhelper import SparqlViewHelper
import nexussdk as nexus
import Nexus.Mapper as mapper
import Nexus.Utils as utils
import Nexus.Payload as payload

Set up the Blue Brain Nexus sandbox environment

In [None]:
DEPLOYMENT = "https://sandbox.bluebrainnexus.io/v1"

In [None]:
TOKEN = getpass.getpass() # Paste your token here

In [None]:
nexus.config.set_environment(DEPLOYMENT)

In [None]:
nexus.config.set_token(TOKEN)

In [None]:
ORGANIZATION = "tutorialnexus" # For the purpose of this workshop, we will be workin in the tutorialnexus organization

In [None]:
PROJECT = "akkaufma" # Paste your project name here

## Collect and explore: Allen Cell Types Database electrophysiology and neuron morphology data (files and metadata)

We will be working with human and mouse neuron morphology and electrophysiology data from the [Allen Cell Types Database](https://celltypes.brain-map.org/). The [AllenSDK](https://allensdk.readthedocs.io/en/latest/) can be used for data download

In [None]:
ctc = CellTypesCache(manifest_file="./allen_cell_types_db/manifest.json")

We will select all cells for which there is a reconstructed neuron morphology available

In [None]:
allen_cells = ctc.get_cells(require_reconstruction = True)

In [None]:
print("Total number of cells in the Allen Cell Types Database which have ephys and reconstruction data: %d" % len(allen_cells))

We will be downloading a subset of the data from the Allen Cell Types Database (the 20 first cells)

In [None]:
allen_cells_ids = [c["id"] for c in allen_cells][0:20] # TODO: change to all cells with reconstruction?

Download the reconstructed neuron morphology files (file format: swc)

In [None]:
allen_cells_reconstruction = [ctc.get_reconstruction(i) for i in allen_cells_ids]

Download the trace collection files (file format: nwb)

In [None]:
allen__cells_electrophysiology = [ctc.get_ephys_data(i) for i in allen_cells_ids]

Acces the cells.json metadata file

In [None]:
allen_cells_metadata = utils.load_json("./allen_cell_types_db/cells.json")

Display the first element from the cells.json file

In [None]:
allen_cells_metadata[0]

Plot a reconstructed neuron morphology

In [None]:
morphology = allen_cells_reconstruction[0]
fig, axes = plt.subplots(1, 2, sharey=True, sharex=True)
axes[0].set_aspect('equal')
axes[1].set_aspect('equal')

# Make a line drawing of x-y and y-z views
for n in morphology.compartment_list:
    for c in morphology.children_of(n):
        axes[0].plot([n['x'], c['x']], [n['y'], c['y']], color='black')
        axes[1].plot([n['z'], c['z']], [n['y'], c['y']], color='black')

axes[0].set_ylabel('y')
axes[0].set_xlabel('x')
axes[1].set_xlabel('z')
plt.show()

## Ingest: Store electrophysiology and neuron morphology files in Blue Brain Nexus

Store the neuron morphologies using the Blue Brain Nexus DefaultStorage

In [None]:
# TODO: include avoid pushing the same data twice

In [None]:
morph_files_meta = dict()

In [None]:
for cell_id in allen_cells_ids:
    morph_files_meta = utils.store_allen_files(nexus, cell_id=cell_id, data_type="reconstruction", metadata_dict=morph_files_meta, org_label=ORGANIZATION, project_label=PROJECT)

In [None]:
utils.save_json(morph_files_meta, "./morphs_files_meta.json") # TODO: remove?

Store the electrophysiology using the Blue Brain Nexus DefaultStorage

In [None]:
ephys_files_meta = dict()

In [None]:
for cell_id in allen_cells_ids:
    ephys_files_meta = utils.store_allen_files(nexus, cell_id=cell_id, data_type="ephys", metadata_dict=ephys_files_meta, org_label=ORGANIZATION, project_label=PROJECT)

In [None]:
utils.save_json(ephys_files_meta, "./ephys_files_meta.json") # TODO: remove?

## Map: Allen Cell Types Database metadata to Neuroshapes

In [None]:
# TODO: remove

ephys_files_metadata = utils.load_json("ephys_files_meta.json")
morphs_files_metadata = utils.load_json("morph_files_meta.json")

Select the metadata of your subset of cells

In [None]:
subset_allen_cells_metadata = list()
for cell in allen_cells_metadata:
    if str(cell["specimen__id"]) in ephys_files_metadata.keys():
        subset_allen_cells_metadata.append(cell)

In [None]:
mapping = mapper.Mapper()

Map the metadata provided by the Allen Cell Types Database to Neuroshapes

In [None]:
metadata_entities = mapping.allencelltypesdb2neuroshapes(subset_allen_cells_metadata)

Add experimental protocol information to the metadata entities

In [None]:
payload = payload.Experiment()

In [None]:
ephys_experimental_protocol = payload.experimentalprotocol(name="Technical White Paper: Electrophysiology",
                                                            at_id="http://help.brain-map.org/download/attachments/8323525/CellTypes_Ephys_Overview.pdf?version=2&modificationDate=1508180425883&api=v2",
                                                            author_id="https://www.grid.ac/institutes/grid.417881.3",
                                                            date_published="2017-10-00T00:00:00",
                                                            description="Protocol used to generate Allen Cell Types Database")

In [None]:
metadata_entities["@graph"].append(ephys_experimental_protocol)

In [None]:
reconstruction_experimental_protocol = payload.experimentalprotocol(name="Technical White Paper: Cell Morphology and Histology",
                                                            at_id="http://help.brain-map.org/download/attachments/8323525/CellTypes_Morph_Overview.pdf?version=4&modificationDate=1528310097913&api=v2",
                                                            author_id="https://www.grid.ac/institutes/grid.417881.3",
                                                            date_published="2017-10-00T00:00:00",
                                                            description="Protocol used to generate Allen Cell Types Database")

In [None]:
metadata_entities["@graph"].append(reconstruction_experimental_protocol)

In [None]:
utils.save_json(metadata_entities, "./metadata_entities.json") # TODO: remove?

## Ingest: Store mapped Allen Cell Types Database metadata in Blue Brain Nexus

In [None]:
# TODO: what should be the context?

In [None]:
utils.store_allen_metadata(nexus, ORGANIZATION, PROJECT, metadata_entities, ephys_files_metadata, morphs_files_metadata)

## Find and download: Filter by metadata using SPARQL

Define the properties you want to filter by

In [None]:
data_type = "nsg:ReconstructedNeuronMorphology"
brain_region_layer = "\"layer 5\""
brain_region = "" # TODO: Add option to filter by brain region
apical_dendrite = "\"intact\""

Provide the SPARQL query

In [None]:
sparqlview_endpoint = f"{DEPLOYMENT}/views/{ORGANIZATION}/{PROJECT}/graph/sparql"

In [None]:
nexus_df = utils.query_data(sparqlview_endpoint, data_type, brain_region_layer, apical_dendrite, TOKEN)

In [None]:
if nexus_df is not None:
    print("Results stats: ")
    display(nexus_df.describe())
    print("Results : ")
    display(nexus_df.head(5))
    entities = set(nexus_df["entity"])
    print(" : %s" % (len(entities)))
else:
    print("No result was found")

Download the selected reconstructed neuron morphologies

In [None]:
data_dir ="./Download/"

In [None]:
if not os.path.exists(data_dir):
    os.mkdir(data_dir)

In [None]:
download_urls = list(set(nexus_df["downloadUrl"]))

In [None]:
print(f"Number of download links: {len(download_urls)}")

In [None]:
for url in download_urls:    
    try:
        response = nexus.files.fetch(ORGANIZATION, PROJECT, file_id=url, out_filepath=data_dir)
    except nexus.HTTPError as e:
        print(e)
        nexus.tools.pretty_print(resource)
        print("----")
        nexus.tools.pretty_print(e.response.json())

In [None]:
# TODO: Download
#- get one and plot it
#- add the file extension?