# Blue Brain Nexus Workshop - Toronto 2019.11

## Configure: environment and pre-requisites

In [None]:
!pip install allensdk
!pip install -U nexus-sdk
!pip install rdflib
!pip install SPARQLWrapper

In [1]:
import requests
import json
from collections import defaultdict
import getpass

from allensdk.core.cell_types_cache import CellTypesCache
from allensdk.api.queries.cell_types_api import CellTypesApi
from allensdk.core.cell_types_cache import ReporterStatus as RS
import nexussdk as nexus

import Nexus.Mapper as nm
import Nexus.Utils as nu

In [2]:
DEPLOYMENT = "https://sandbox.bluebrainnexus.io/v1"

In [16]:
TOKEN = getpass.getpass()

 ·······································································································································································································································································································································································································································································································································································································································································································································································································································································································································································································································

In [17]:
nexus.config.set_environment(DEPLOYMENT)

In [18]:
nexus.config.set_token(TOKEN)

In [6]:
ORGANIZATION = "tutorialnexus"

In [7]:
PROJECT = "akkaufma" # TODO: tell them to use their project label here

## Collect and explore: Download Allen Cell Types Database data

We will be working with human and mouse neuron morphology and electrophysiology data from the [Allen Cell Types Database](https://celltypes.brain-map.org/). The [AllenSDK](https://allensdk.readthedocs.io/en/latest/) can be used for data download

In [8]:
ctc = CellTypesCache(manifest_file="./allen_cell_types_db/manifest.json")

In [9]:
allen_cells = ctc.get_cells(require_reconstruction = True)

In [11]:
print("Total number of cells with reconstruction and ephys: %d" % len(allen_cells))

Total number of cells with reconstruction and ephys: 637


In [12]:
allen_cells_ids = [c["id"] for c in allen_cells][0:20] # TODO: change to all cells with reconstruction?

In [13]:
allen_reconstruction = [ctc.get_reconstruction(i) for i in allen_cells_ids]

In [14]:
allen_electrophysiology = [ctc.get_ephys_data(i) for i in allen_cells_ids]

In [None]:
# TODO: expllore --> show the directories and the data (ls etc): metadata and files
# let's explore the data ... (display the first elements of the data)
# plot a morphology etc.
# include a figure what you get (same figure as on the website : what you get)

## Ingest: Store the ephys and reconstruction files into Blue Brain Nexus

Push all the neuron morphologies

In [None]:
# TODO: take out the distribution
# TODO: take out the save JSON
# TODO: avoid pushing the same data twice

In [None]:
morph_files_meta = dict()

In [None]:
# TODO: put a little text and add ... defualtnexusstorage

In [15]:
allen_cells_ids

[485909730,
 323865917,
 476135066,
 502614426,
 519832676,
 480087928,
 596020931,
 569095789,
 528706755,
 473611755,
 475549334,
 501799874,
 531520637,
 314804042,
 555241040,
 488679042,
 502267531,
 616647103,
 530737765,
 502367941]

In [None]:
# TODO catch the error
# TODO add def to push files and get distribution
# function should just get file format (all swc, nwb)

In [None]:
for cell_id in allen_cells_ids:
    file_path = f"./allen_cell_types_db/specimen_{cell_id}/reconstruction.swc"
    response = nexus.files.create(org_label=ORGANIZATION, project_label=PROJECT, filepath=file_path)
    morph_files_meta[cell_id] = {
       "@type": "DataDownload",
       "contentUrl": response["_self"],
        "contentSize": {
            "unitCode": "bytes",
            "value": response["_bytes"]
        },
        "digest": {
            "algorithm": "SHA-256",
            "value": response["_digest"]["_value"],
        },
        "encodingFOrmat": "application/swc",
        "name": response["_filename"]
    }

In [None]:
save_json(morph_files_meta, "./morphs_files_meta.json")

Push all the electrophysiology

In [None]:
ephys_files_meta = dict()

In [None]:
for cell_id in allen_cells_ids:
    file_path = f"./allen_cell_types_db/specimen_{cell_id}/ephys.nwb"
    response = nexus.files.create(org_label=ORGANIZATION, project_label=PROJECT, filepath=file_path)
    ephys_files_meta[cell_id] = {
       "@type": "DataDownload",
       "contentUrl": response["_self"],
        "contentSize": {
            "unitCode": "bytes",
            "value": response["_bytes"]
        },
        "digest": {
            "algorithm": "SHA-256",
            "value": response["_digest"]["_value"],
        },
        "encodingFOrmat": "application/nwb",
        "name": response["_filename"]
    }

In [None]:
save_json(ephys_files_meta, "./ephys_files_meta.json")

## Map: Map the Allen metadata to the Neuroshapes data model

In [None]:
neurons_allen = nsu.get_json("./allen_cell_types_db/cells.json") # TODO change the same

In [None]:
neuron_morphs_allen = list()
for neuron in neurons_allen:
    if neuron["specimen__id"] in ephys_files_meta.keys():
        neuron_morphs_allen.append(neuron)

In [None]:
neuron_morphs_allen[0] # TODO: change the naming

In [None]:
# TODO: create Allen utility and abstract out

In [None]:
vocabulary = nsu.get_json("./vocabulary.json") # TODO load Json

In [None]:
mapper = nsm.Mapper()

In [None]:
at_graph_allen = mapper.allencelltypesdb2neuroshapes(neuron_morphs_allen, vocabulary) # TODO: naming

In [None]:
# TODO: add the provenance information regarding contribution & protocol and update the entities which point to the protocol def

In [None]:
# TODO: you can save...

In [None]:
filename = "./at_graph_allen.json"
with open(filename, 'w') as filehandle:
    json.dump(at_graph_allen, filehandle, sort_keys=True, indent=4)

## Ingest: Ingest the metadata graph

In [None]:
# TODO: change to the context from the atgraph at_graph_allen["@context"]
context = "https://bbp.neuroshapes.org"

In [None]:
entities = at_graph_allen["@graph"]

In [None]:
# TODO: fix the cell id to get the right metadata
# TODO: store the url to the allen website?
for entity in entities:
    entity["@context"] = context
    if "nsg:NeuronMorphology" in entity["@type"]:
        distribution = morph_files_meta[int(entity["@id"].split("_")[-1])] # TODO: use the identifier
        entity["distribution"] = distribution
#     nexus.tools.pretty_print(entity)
    try:
        nexus.resources.create(org_label=ORGANIZATION, project_label=PROJECT, data=entity)
    except nxs.HTTPError as e:
        print(e)
        print("---")
        nxs.tools.pretty_print(e.response.json())    

# TODO: Download
- get one and plot it
- download through nexus sdk directly 
- sparql by brain region etc... --> they should get data frame with metadata and have the option to download result and metadata
- perform this potentially on a pre-set project with more data