# Store, manage and access neuroscience datasets from different sources with MINDS metadata and provenance

## Initialize and configure

In [None]:
!pip install nexusforge

In [None]:
!pip install allensdk

In [None]:
!pip install neurom

### Get an authentication token

For now, the [Nexus sandbox application](https://sandbox.bluebrainnexus.io/web) can be used to get a token. We are looking for other simpler alternatives.

- Step 1: From the opened web page, click on the login button on the right corner and follow the instructions.

![login-ui](./login-ui.png)

- Step 2: At the end you’ll see a token button on the right corner. Click on it to copy the token.

![login-ui](./copy-token.png)


Once a token is obtained then proceed to paste it below.

In [None]:
import getpass

In [None]:
TOKEN = getpass.getpass()

### Configure a forge client to store, manage and access datasets

In [None]:
from kgforge.core import KnowledgeGraphForge

In [None]:
ORG = "github-users"
PROJECT = ""  # Provide here the automatically created project name with your login when you logged in the Nexus sandbox instance.

In [None]:
from kgforge.core import KnowledgeGraphForge

forge = KnowledgeGraphForge("forge.yml",
                            bucket=f"{ORG}/{PROJECT}",
                            endpoint="https://sandbox.bluebrainnexus.io/v1",
                            token=TOKEN
                           )

#### Configuration

In [None]:
import json
import requests
import uuid
import base64

from kgforge.specializations.mappings import DictionaryMapping

from allensdk.api.queries.cell_types_api import CellTypesApi
from allensdk.core.cell_types_cache import CellTypesCache

## Download datasets from Allen Cell Types Database and from MouseLight

### Download mouse neuron morphologies from the Allen Cell Types Database

### Select one mouse neuron along with its metadata

In [None]:
ALLEN_DIR = "allen_cell_types_database"

In [None]:
ctc = CellTypesCache(manifest_file=f"{ALLEN_DIR}/manifest.json")

In [None]:
MAX_CELLS = 1 # Increase to include more cells
SPECIES = CellTypesApi.MOUSE

In [None]:
nm_allen_identifiers = [cell["id"] for cell in ctc.get_cells(species=[SPECIES], require_reconstruction = True)][:MAX_CELLS]
print(f"Selected a mouse neuron with identifier: {nm_allen_identifiers}")

In [None]:
with open(f"{ALLEN_DIR}/cells.json") as f:
    allen_cell_types_metadata = json.load(f)

In [None]:
nm_allen_metadata = [neuron for neuron in allen_cell_types_metadata if neuron["specimen__id"] in nm_allen_identifiers]

In [None]:
print(f"Metadata of the neuron {nm_allen_identifiers}:")
nm_allen_metadata

### Download one mouse neuron morphology reconstructed from the selected single neuron

We will be downloading one mouse neuron morphology data from the [Allen Cell Types Database](https://celltypes.brain-map.org/) using the [AllenSDK](https://allensdk.readthedocs.io/en/latest/).

In [None]:
for identifier in nm_allen_identifiers:
    ctc.get_reconstruction(identifier)

### Download one mouse neuron electrophysiology recordings from the selected single neuron

In [None]:
for identifier in nm_allen_identifiers:
    ctc.get_ephys_data(identifier)

## Transform Allen Metadata to [Neuroshapes' MINDS](https://bbp-nexus.epfl.ch/datamodels/class-schemadataset.html) metadata

Transform Allen Metadata to [Neuroshapes' MINDS](https://bbp-nexus.epfl.ch/datamodels/class-schemadataset.html).

### Map the Allen Cell Types Database neuron morphologies metadata to Neuroshapes

In [None]:
allen_nm_mapping = DictionaryMapping.load("./mappings/allen_morphology_dataset.hjson")
nm_allen_resources = forge.map(nm_allen_metadata, allen_nm_mapping, na='')

### Map the Allen Cell Types Database neuron electrophysiology recordings to Neuroshapes

In [None]:
allen_ephys_mapping = DictionaryMapping.load("./mappings/allen_ephys_dataset.hjson")
nephys_allen_resources = forge.map(nm_allen_metadata, allen_ephys_mapping, na='')

## Register

In [None]:
import uuid
import base64

### Register the Allen Cell Types Database neuron morphologies

In [None]:
nm_allen_resources.id = forge.format("identifier", "neuronmorphologies", str(uuid.uuid4()))


In [None]:
forge.register(nm_allen_resources)

### Register the Allen Cell Types Database neuron electrophysiology recordings

In [None]:
nephys_allen_resources.id = forge.format("identifier", "traces", str(uuid.uuid4()))

In [None]:
forge.register(nephys_allen_resources)

## Access

### Set filters

In [None]:
_type = "NeuronMorphology"
brainRegion = "MTG"
layer = "layer 4"

filters = {
            "type":_type,
            "brainLocation":{
                "brainRegion":{
                    "label":brainRegion
                },
                "layer":"2"
            }
          }

### Run Query

In [None]:
number_of_results = 2

data = forge.search(filters, limit=number_of_results)

print(str(len(data))+" dataset of type '"+_type+"' found.")

### Display the results as pandas dataframe

In [None]:
DISPLAY_LIMIT = 10
property_to_display = ["id","name","subject","brainLocation.brainRegion.id","brainLocation.brainRegion.label","brainLocation.layer.id","brainLocation.layer.label", "contribution","brainLocation.layer.id","brainLocation.layer.label","distribution.name","distribution.contentUrl","distribution.encodingFormat"]
reshaped_data = forge.reshape(data, keep = property_to_display)

forge.as_dataframe(reshaped_data[:DISPLAY_LIMIT])

### Dowload

In [None]:
dirpath = "./downloaded/"
forge._debug =True
forge.download(data, "distribution.contentUrl", dirpath, overwrite=False)

In [None]:
ls ./downloaded/

### Display a result as 3d Neuron Morphology

In [None]:
from neurom import load_neuron
from neurom.view.plotly import draw
neuron = load_neuron(f"{dirpath}/{data[0].distribution.name}")
draw(neuron, inline=False)

## Version the dataset
Tagging a dataset is equivalent to git tag. It allows to version a dataset.

In [None]:
forge.tag(data, value="releaseV112")

In [None]:
# version argument can be specified to retroeive the dataset at a given tag.
tagged_data = forge.retrieve(id=data[0].id, version="releaseV112")

In [None]:
forge.as_dataframe(tagged_data)

In [None]:
data[0].description="Neuron Morphology from Allen"

In [None]:
forge.update(data[0])

In [None]:
non_tagged_data = forge.retrieve(id=data[0].id)

In [None]:
forge.as_dataframe(non_tagged_data)