# TODO

- some molecular types not available on BMO: Serpinf, Sncg

## Context

See also: https://bbpteam.epfl.ch/project/issues/browse/DKE-961

## Imports

In [None]:
import uuid
import getpass
import rdflib
import pandas as pd
from rdflib import RDF, RDFS, XSD, OWL, URIRef, BNode, SKOS
from kgforge.core import KnowledgeGraphForge
from kgforge.specializations.mappings import DictionaryMapping
from kgforge.specializations.mappers import DictionaryMapper

In [None]:
from kgforge.version import __version__
print(__version__)

## Setup

Get your TOKEN from Nexus Web: https://bbp.epfl.ch/nexus/web :
* log in using your Gaspar user name and password
* click on "Copy token" in the top right corner
* paste copied token into the cell below

In [None]:
TOKEN = getpass.getpass()

In [None]:
forge = KnowledgeGraphForge("https://raw.githubusercontent.com/BlueBrain/nexus-forge/master/examples/notebooks/use-cases/prod-forge-nexus.yml",
                            token=TOKEN,
                            endpoint="https://staging.nise.bbp.epfl.ch/nexus/v1",
                            bucket="neurosciencegraph/datamodels",
                            debug=True)

## Source data

In [None]:
BASE = "/Users/akkaufma/Desktop/probs" # TODO

### Cell Types Ontology

This ontology can be used to get the etype and mtype identifiers from their labels which are stored in the spreadsheet with the probabilities

In [None]:
cell_types_id = "http://bbp.epfl.ch/neurosciencegraph/ontologies/core/celltypes"

In [None]:
cell_types_resource = forge.retrieve(cell_types_id)

In [None]:
forge.download(cell_types_resource, "distribution.contentUrl", BASE)

In [None]:
for d in cell_types_resource.distribution:
    if ".ttl" in d.name:
        cell_types_ttl = d.name

In [None]:
cell_types_ontology = rdflib.Graph()
cell_types_ontology.parse(f"{BASE}/{cell_types_ttl}")

### Brain Modeling Ontology

This ontology can be used to get the molecular type identifiers from their labels which are stored in the spreadsheet with the probabilities

In [None]:
bmo_id = "https://bbp.epfl.ch/ontologies/core/bmo"

In [None]:
bmo_resource = forge.retrieve(bmo_id)

In [None]:
forge.download(bmo_resource, "distribution.contentUrl", BASE)

In [None]:
for d in bmo_resource.distribution:
    if ".ttl" in d.name:
        bmo_ttl = d.name

In [None]:
bmo = rdflib.Graph()
bmo.parse(f"{BASE}/{bmo_ttl}")

### Probabilities

In [None]:
probability_file = f"{BASE}/P(BBPmarker_metype)_L26_(Gouw+pseq_BBP)April_16_2021.csv"

In [None]:
p_df = pd.read_csv(probability_file)

In [None]:
p_df.set_index("Unnamed: 0")

In [None]:
for el in p_df["Unnamed: 0"]:
    print(el)

! The labels in the spreadsheet don't fully match the labels from the BMO. Hence the labels and ids have been pasted as a mapping below

In [None]:
molecular_types = {
    "Vip": {
        "id": "https://bbp.epfl.ch/ontologies/core/bmo/VIP+",
        "label": "VIP+"
    },
    "Lamp5": {
        "id": "https://bbp.epfl.ch/ontologies/core/bmo/Lamp+",
        "label": "Lamp+"
    },
    "Pvalb": {
        "id": "https://bbp.epfl.ch/ontologies/core/bmo/PV+",
        "label": "PV+"
    },
    "Sst": {
        "id": "https://bbp.epfl.ch/ontologies/core/bmo/SST+",
        "label": "SST+"
    },
    "Sncg": {
        "id": "https://bbp.epfl.ch/ontologies/core/bmo/Sncg+",
        "label": "Sncg+"
    },
    "Serpinf1": {
        "id": "https://bbp.epfl.ch/ontologies/core/bmo/Serpinf1+",
        "label": "Serpinf1+"
    }
}

In [None]:
for c in p_df.columns[1:]:
    fragments = c.split("_")
    mtype_label = f"{fragments[0]}_{fragments[1]}"
    etype_label = fragments[-1]
    for s, p, o in cell_types_ontology.triples((None, RDFS.label, rdflib.term.Literal(mtype_label, lang="en"))):
        mtype_id = s
    for s, p, o in cell_types_ontology.triples((None, RDFS.label, rdflib.term.Literal(etype_label, lang="en"))):
        etype_id = s

In [None]:
resources = list()

In [None]:
for i, r in p_df.iterrows():
    obj = dict()
    r = dict(r)
    moltype = r["Unnamed: 0"]
    obj["moltype_label"] = molecular_types[moltype]["label"]
    obj["moltype_id"] = molecular_types[moltype]["id"]
    for k, v in list(r.items())[1:]:
        fragments = k.split("_")
        mtype_label = f"{fragments[0]}_{fragments[1]}"
        etype_label = fragments[-1]
        for s, p, o in cell_types_ontology.triples((None, RDFS.label, rdflib.term.Literal(mtype_label, lang="en"))):
            mtype_id = s
        for s, p, o in cell_types_ontology.triples((None, RDFS.label, rdflib.term.Literal(etype_label, lang="en"))):
            etype_id = s
        obj["etype_label"] = etype_label
        obj["etype_id"] = str(etype_id)
        obj["mtype_label"] = mtype_label
        obj["mtype_id"] = str(mtype_id)
        obj["value"] = v
        obj["name"] = f"{obj['moltype_label']}_{obj['mtype_label']}_{obj['etype_label']}"
        resources.append(obj)

## Annotation resources

### Mapping

In [None]:
mapping = DictionaryMapping.load("../data/mappings/ProbabilityAnnotation.hjson")

### Transformation

In [None]:
contribution = forge.from_json({
    "type": "Contribution",
    "agent": {
        "id": "https://bbp.epfl.ch/nexus/v1/realms/bbp/users/yroussel",
        "type": ["Agent", "Person"]
    }
})

In [None]:
softwareagent = forge.from_json(
        {
            "type": ["Agent", "SoftwareAgent"],
            "softwareSourceCode": {
                "type": "SoftwareSourceCode",
                "codeRepository": {
                    "id": "https://github.com/BlueBrain/me-types-mapper"
                },
                "programmingLanguage": "Python"

            },
            "name": "me-types-mapper",
            "description": "me-types-mapper is a python package that propose a probabilistic mapping between cell types from two different datasets based on shared morpho-electrical features."
        }
    )

In [None]:
generation = forge.from_json({
    "type": "Generation",
    "activity": {
        "type": "Activity",
        "startedAtTime": {
            "@value": "2021-04-16T00:00:00",
            "@type": "xsd:dateTime"
        },
        "endedAtTime": {
            "@value": "2021-04-16T23:59:00",
            "@type": "xsd:dateTime"
        },
        "wasAssociatedWith": softwareagent
    }
})

In [None]:
annotations = forge.map(resources, mapping, DictionaryMapper)

In [None]:
for resource in annotations:
    resource.contribution = contribution
    resource.generation = generation
    resource.id = forge.format("identifier", "annotations", str(uuid.uuid4()))

### Registration

In [None]:
print(annotations[0])

In [None]:
forge.register(annotations, "datashapes:annotation")