In [133]:
import math

from string import Template

import matplotlib.pyplot as plt
import numpy as np

import getpass
from collections import namedtuple
from urllib.parse import quote_plus

import nexussdk as nxs
from kgforge.core import KnowledgeGraphForge

## Helpers

In [134]:
FORMULAS = {
    "cosine": "doc['embedding'].size() == 0 ? 0 : (cosineSimilarity(params.query_vector, doc['embedding']) + 1.0) / 2",
    "euclidean": "doc['embedding'].size() == 0 ? 0 : (1 / (1 + l2norm(params.query_vector, doc['embedding'])))",
    "poincare": "float[] v = doc['embedding'].vectorValue; if (doc['embedding'].size() == 0) { return 0; } double am = doc['embedding'].magnitude; double bm = 0; double dist = 0; for (int i = 0; i < v.length; i++) { bm += Math.pow(params.query_vector[i], 2); dist += Math.pow(v[i] - params.query_vector[i], 2); } bm = Math.sqrt(bm); dist = Math.sqrt(dist); double x = 1 + (2 * Math.pow(dist, 2)) / ( (1 - Math.pow(bm, 2)) * (1 - Math.pow(am, 2)) );  double d = Math.log(x + Math.sqrt(Math.pow(x, 2) - 1)); return 1 / (1 + d);"
}

In [135]:
SIMILARITY_VIEW_MAPPING = {
    "properties": {
      "@id": {
        "type": "keyword"
      },
      "_self": {
          "type": "keyword"
      },
      "@type": {
        "type": "keyword"
      },
      "derivation": {
        "properties": {
          "entity": {
            "properties": {
              "@id": {
                "type": "keyword"
              }
            },
            "type": "nested"
          }
        },
        "type": "nested"
      },
      "embedding": {
        "type": "dense_vector"
      },
      "generation": {
        "properties": {
          "activity": {
            "properties": {
              "used": {
                "properties": {
                  "@id": {
                    "type": "keyword"
                  }
                },
                "type": "nested"
              }
            },
            "type": "nested"
          }
        },
        "type": "nested"
      }
    }
}


BOOSTING_VIEW_MAPPING = {
    "properties": {
      "@id": {
        "type": "keyword"
      },
      "_self": {
          "type": "keyword"
      },
      "@type": {
        "type": "keyword"
      },
      "value": {
        "type": "float"
      },
      "scriptScore": {
        "type": "keyword"
      },
      "vectorParameter": {
        "type": "keyword"
      },
      "derivation": {
        "properties": {
          "entity": {
            "properties": {
              "@id": {
                "type": "keyword"
              }
            },
            "type": "nested"
          }
        },
        "type": "nested"
      },
      "generation": {
        "properties": {
          "activity": {
            "properties": {
              "used": {
                "properties": {
                  "@id": {
                    "type": "keyword"
                  }
                },
                "type": "nested"
              }
            },
            "type": "nested"
          }
        },
        "type": "nested"
      }
    }
}

STATS_VIEW_MAPPING = {
    "properties": {
        "@id": {
            "type": "keyword"
        },
          "_self": {
              "type": "keyword"
          },
        "@type": {
            "type": "keyword"
        }, 
        "boosted": {
            "type": "boolean"
        },
        "scriptScore": {
            "type": "keyword"
        },
        "vectorParameter": {
            "type": "keyword"
        },
        "derivation": {
            "properties": {
              "entity": {
                "properties": {
                  "@id": {
                    "type": "keyword"
                  }
                },
                "type": "nested"
              }
            },
            "type": "nested"
        },
        "series": {
            "properties": {
                "statistic": {
                    "type": "keyword"
                },
                "value": {
                    "type": "float"
                }
            },
            "type": "nested"
        }
    } 
}

In [221]:
def set_elastic_view(forge, view):
    views_endpoint = "/".join((
        ENDPOINT,
        "views",
        quote_plus(forge._store.bucket.split("/")[0]),
        quote_plus(forge._store.bucket.split("/")[1])))
    forge._store.service.elastic_endpoint["endpoint"] = "/".join(
        (views_endpoint, quote_plus(view), "_search"))

    
def get_all_vectors(forge):
    return forge.elastic("""
        {
          "query": {
              "term": {
                  "_deprecated": false
                }
            }
        }
    """)


def get_es_view_mappings(dimension):
    mapping = copy.deepcopy(SIMILARITY_VIEW_MAPPING)
    mapping["properties"]["embedding"]["dims"] = dimension
    return mapping


def vectors_to_resources(forge, vectors, resources, model_id):
    jsons = []
    for i, v in enumerate(vectors):
        json_repr = {
            "@type": ["Entity", "Embedding"],
            "derivation": {
                "@type": "Derivation",
                "entity": {
                  "@id": resources[i],
                  "@type": "Entity"
                }
            },
            "embedding": v.tolist(),
            "generation": {
                "@type": "Generation",
                "activity": {
                  "@type": [
                    "Activity",
                    "EmbeddingActivity"
                  ],
                  "used": {
                    "@id": model_id,
                    "@type": "EmbeddingModel",
                  }
                }
            },
            "name": f"Embedding of {resources[i]}"
        }
        jsons.append(json_repr)
    return forge.from_json(jsons)



def compute_statistics(forge, view_id, score_formula, boosting=None):
    set_elastic_view(forge, view_id)
    all_vectors = get_all_vectors(forge)
    scores = []
    for vector_resource in all_vectors:
        vector = vector_resource._source["embedding"]
        vector_id = vector_resource._id
        neighbors = get_neighbors(
            forge, vector, vector_id,  k=len(all_vectors), score_formula=score_formula)

        for score, el in neighbors:
            boost_factor = 1
            if boosting:
                boost_factor = boosting[vector_id]
            scores.append(score * boost_factor)

    scores = np.array(list(scores))
    Statistics = namedtuple('Statistics', 'min max mean std')
    return len(scores), Statistics(
        scores.min(), scores.max(),
        scores.mean(), scores.std())


def register_stats(forge, view_id, sample_size, stats, formula,
                   tag, boosted=False):
    
    stat_values = [
        {
          "statistic": "min",
          "unitCode": "dimensionless",
          "value": stats.min
        },
        {
          "statistic": "max",
          "unitCode": "dimensionless",
          "value": stats.max
        },
        {
          "statistic": "mean",
          "unitCode": "dimensionless",
          "value": stats.mean
        },
        {
          "statistic": "standard deviation",
          "unitCode": "dimensionless",
          "value": stats.std
        },
        {
          "statistic": "N",
          "unitCode": "dimensionless",
          "value": sample_size
        }
    ]

    stats = forge.search({
        "type": "ElasticSearchViewStatistics",
        "boosted": boosted,
        "derivation": {
            "entity": {
                "id": view_id
            }
        }
    })
    
    if len(stats) > 0:
        stats_resource = stats[0]
        stats_resource.series = forge.from_json(stat_values)
        forge.update(stats_resource)
    else:    
        json_data = {
            "type": "ElasticSearchViewStatistics",
            "boosted": boosted,
            "scriptScore": formula,
            "series": stat_values,
            "derivation": {
                "type": "Derivation",
                "entity": {
                    "id": view_id
                }
            }
        }
        stats_resource = forge.from_json(json_data)
        forge.register(stats_resource)
    forge.tag(stats_resource, tag)
    return stats_resource

def get_score_deviation(forge, point_id, vector, score_min, score_max, k, formula):
    query = f"""{{
      "size": {k},
      "query": {{
        "script_score": {{
          "query": {{
                "exists": {{
                    "field": "embedding"
                }}
          }},
          "script": {{
            "source": "{FORMULAS[formula]}",
            "params": {{
              "query_vector": {vector}
            }}
          }}
        }}
      }}
    }}"""

    result = forge.elastic(query)
    scores = set()
    for el in result:
        if point_id != el._id:
            # Min/max normalization of the score
            score = (el._score - score_min) / (score_max - score_min)
            scores.add(score)
    scores = np.array(list(scores))
    return math.sqrt(((1 - scores)**2).mean())


def compute_boosting_factors(forge, view_id, stats, formula, neighborhood_size=10):
    boosting_factors = dict()
    # Compute local similarity deviations for points
    set_elastic_view(forge, view_id)
    all_vectors = get_all_vectors(forge)
    for vector_resource in all_vectors:
        point_id = vector_resource._id
        vector = vector_resource._source["embedding"]
        boosting_factors[point_id] = 1 + get_score_deviation(
            forge, point_id, vector, stats.min, stats.max,
            neighborhood_size, formula)

    return boosting_factors


def register_boosting_data(forge, view_id, boosting_factors,
                           formula, tag):
    generation_resource = forge.from_json({
        "type": "Generation",
        "activity": {
            "type": "Activity",
            "used": {
                "id": view_id,
                "type": "AggregateElasticSearchView"
            }
        }
    })
    resources = []
    for k, v in boosting_factors.items():
        existing_data = forge.search({
            "type": "SimilarityBoostingFactor",
            "derivation": {
                "entity": {
                    "id": k
                }
            }
        })
        if len(existing_data) > 0:
            boosting_resource = existing_data[0]
            boosting_resource.value = v
            boosting_resource.generation = generation_resource
            forge.update(boosting_resource)
            resources = existing_data
        else:       
            json_data = {
                "type": "SimilarityBoostingFactor",
                "value": v,
                "unitCode": "dimensionless",
                "scriptScore": FORMULAS[formula],
                "vectorParameter": "query_vector",
                "derivation": {
                    "type": "Derivation",
                    "entity": {
                        "id": k,
                        "type": "Embedding"
                    }
                },
            }
            boosting_resource = forge.from_json(json_data)
            boosting_resource.generation = generation_resource
            resources.append(boosting_resource)
            forge.register(boosting_resource)
        forge.tag(boosting_resource, tag)
    return resources


def get_neighbors(forge, vector, vector_id, k, score_formula="euclidean",
                  result_filter=None):
    """Get nearest neighbors of the provided vector."""
    # Preprocess result filter
    if result_filter:
        if parameters:
            result_filter = Template(result_filter).substitute(parameters)
        result_filter = ",\n" + result_filter
    else:
        result_filter = ""

    similarity_query = """
        {
          "size": $_k,
          "query": {
            "script_score": {
                "query": {
                    "bool" : {
                      "must_not" : {
                        "term" : { "_self": "$_vectorId" }
                      },
                      "must": { "exists": { "field": "embedding" } }
                      $_resultFilter
                    }
                },
                "script": {
                    "source": "$_formula",
                    "params": {
                      "query_vector": $_vector
                    }
                }
            }
          }
    }
    """
    similarity_query = Template(similarity_query).substitute({
        "_vectorId": vector_id,
        "_vector": vector,
        "_k": k,
        "_formula": FORMULAS[score_formula],
        "_resultFilter": result_filter
    })

    return [
        (el._score, el._source)
        for el in forge.elastic(similarity_query, limit=1000)
    ]

In [152]:
TOKEN = getpass.getpass()

········


In [131]:
ENDPOINT = "https://staging.nexus.ocp.bbp.epfl.ch/v1"

In [132]:
forge = KnowledgeGraphForge(
    "../../configs/new-forge-config.yaml",
    endpoint=ENDPOINT,
    token=TOKEN, 
    bucket="dke/inference-test")

## Populate brain regions

In [None]:
brain_region = {
  "@context": "https://neuroshapes.org",
  "@id": "https://neuroshapes.org/BrainRegion",
  "@type": "Class",
  "altLabel": "regional part of the brain",
  "definition": "Anatomical divisons of the brain according to one or more criteria, e.g. cytoarchitectural, gross anatomy. Parts may be contiguous in 3D or not, e.g., basal ganglia.",
  "isDefinedBy": "https://bbp.epfl.ch/ontologies/core/bmo",
  "label": "Brain Region",
}
isocortex = {
  "@id": "http://api.brain-map.org/api/v2/data/Structure/315",
  "@type": "Class",
  "atlas_id": 746,
  "color_hex_triplet": "70FF71",
  "graph_order": 5,
  "hemisphere_id": 3,
  "identifier": "315",
  "isDefinedBy": "http://bbp.epfl.ch/neurosciencegraph/ontologies/mba",
  "isPartOf": "mba:695",
  "label": "Isocortex",
  "notation": "Isocortex",
  "prefLabel": "Isocortex",
  "st_level": 5,
  "subClassOf": "https://neuroshapes.org/BrainRegion"
}

ILA = {
  "@id": "http://api.brain-map.org/api/v2/data/Structure/44",
  "@type": "Class",
  "atlas_id": 146,
  "color_hex_triplet": "59B363",
  "graph_order": 245,
  "hemisphere_id": 3,
  "identifier": "44",
  "isDefinedBy": "http://bbp.epfl.ch/neurosciencegraph/ontologies/mba",
  "isPartOf": "http://api.brain-map.org/api/v2/data/Structure/315",
  "label": "Infralimbic area",
  "notation": "ILA",
  "prefLabel": "Infralimbic area",
  "st_level": 8,
  "subClassOf": "nsg:BrainRegion"
}

PTLp = {
  "@id": "http://api.brain-map.org/api/v2/data/Structure/22",
  "@type": "Class",
  "atlas_id": 285,
  "color_hex_triplet": "009FAC",
  "graph_order": 339,
  "hemisphere_id": 3,
  "identifier": "22",
  "isDefinedBy": "http://bbp.epfl.ch/neurosciencegraph/ontologies/mba",
  "isPartOf": "http://api.brain-map.org/api/v2/data/Structure/315",
  "label": "Posterior parietal association areas",
  "notation": "PTLp",
  "prefLabel": "Posterior parietal association areas",
  "st_level": 6,
  "subClassOf": "nsg:BrainRegion"
}

ACA = {
  "@id": "http://api.brain-map.org/api/v2/data/Structure/31",
  "@type": "Class",
  "atlas_id": 3,
  "color_hex_triplet": "40A666",
  "graph_order": 220,
  "hemisphere_id": 3,
  "identifier": "31",
  "isDefinedBy": "http://bbp.epfl.ch/neurosciencegraph/ontologies/mba",
  "isPartOf": "http://api.brain-map.org/api/v2/data/Structure/315",
  "label": "Anterior cingulate area",
  "notation": "ACA",
  "prefLabel": "Anterior cingulate area",
  "st_level": 8,
  "subClassOf": "nsg:BrainRegion"
}

ACAv = {
  "@id": "http://api.brain-map.org/api/v2/data/Structure/48",
  "@type": "Class",
  "atlas_id": 5,
  "color_hex_triplet": "40A666",
  "graph_order": 232,
  "hemisphere_id": 3,
  "identifier": "48",
  "isDefinedBy": "http://bbp.epfl.ch/neurosciencegraph/ontologies/mba",
  "isPartOf": "http://api.brain-map.org/api/v2/data/Structure/31",
  "label": "Anterior cingulate area, ventral part",
  "notation": "ACAv",
  "prefLabel": "Anterior cingulate area, ventral part",
  "st_level": 9,
  "subClassOf": "nsg:BrainRegion"
}

thalamus = {
  "@id": "http://api.brain-map.org/api/v2/data/Structure/549",
  "@type": "Class",
  "atlas_id": 351,
  "color_hex_triplet": "FF7080",
  "graph_order": 641,
  "hemisphere_id": 3,
  "identifier": "549",
  "isDefinedBy": "http://bbp.epfl.ch/neurosciencegraph/ontologies/mba",
  "isPartOf": "mba:1129",
  "label": "Thalamus",
  "notation": "TH",
  "prefLabel": "Thalamus",
  "st_level": 5,
  "subClassOf": "nsg:BrainRegion"
}

In [None]:
# brain_region_resources = [brain_region, thalamus, isocortex, ILA, PTLp, ACA, ACAv]
# forge.register(forge.from_json(brain_region_resources))

In [None]:
model_id = "https://bbp.epfl.ch/neurosciencegraph/data/BrainRegionEmbeddingModel"
brain_region_model = {
    "@id": model_id,
    "@type": "EmbeddingModel",
    "name": "Brain Region Hierarchical Embedding Model",
    "prefLabel": "Brain region model",
    "similarity": "euclidean",
    "vectorDimension": 2
}

In [None]:
forge.register(forge.from_json(brain_region_model))

In [None]:
regions = [
    "https://neuroshapes.org/BrainRegion",
    "http://api.brain-map.org/api/v2/data/Structure/315",
    "http://api.brain-map.org/api/v2/data/Structure/44", 
    "http://api.brain-map.org/api/v2/data/Structure/22",
    "http://api.brain-map.org/api/v2/data/Structure/31",
    "http://api.brain-map.org/api/v2/data/Structure/48",
    "http://api.brain-map.org/api/v2/data/Structure/549"
]
region_embeddings = np.array([
    [2, 1],
    [3, 2],
    [3.2, 2.8], 
    [3.8, 1.8],
    [2.8, 3.2],
    [2.6, 3],
    [1, 2]
])
plt.scatter(region_embeddings[:, 0], region_embeddings[:, 1])
plt.show()

In [None]:
resources = vectors_to_resources(
    forge, region_embeddings, regions, model_id)
forge.register(resources)
forge.tag(resources, model_id.split("/")[-1])

In [None]:
# Create an ES view
view_id = "https://bbp.epfl.ch/neurosciencegraph/data/brain-region-embedding-view"
# similarity_view = nxs.views.create_es(
#     "dke", "inference-test",
#     view_id="https://bbp.epfl.ch/neurosciencegraph/data/brain-region-embedding-view",
#     mapping=get_es_view_mappings(2),
#     tag=model_id.split("/")[-1],
#     resource_types=[f"https://neuroshapes.org/Embedding"],
#     source_as_text=False,
#     include_metadata=True, 
#     include_deprecated=False)

In [76]:
view_id

'https://bbp.epfl.ch/neurosciencegraph/data/brain-region-embedding-view'

## Populate MTypes

In [None]:
# Create statistics resources and boosting factors
view_id = "https://bbp.epfl.ch/neurosciencegraph/data/brain-region-embedding-view"
sample_size, stats = compute_statistics(forge, view_id, "euclidean", boosting=boosting_factors)
register_stats(
    forge, view_id, sample_size, stats, "euclidean",
    model_id.split("/")[-1], boosted=True)

In [None]:
mtype = {
  "@id": "https://neuroshapes.org/MType",
  "@type": "Class",
  "isDefinedBy": "http://bbp.epfl.ch/neurosciencegraph/ontologies/mtypes",
  "label": "Neuron Morphological Type",
}

pc = {
  "@id": "https://neuroshapes.org/PyramidalNeuron",
  "@type": "Class",
  "isDefinedBy": "http://bbp.epfl.ch/neurosciencegraph/ontologies/mtypes",
  "label": "Pyramidal neuron",
  "notation": "Pyr",
  "prefLabel": "Pyramidal neuron",
  "subClassOf": "https://neuroshapes.org/MType"
}

tpc = {
  "@id": "https://neuroshapes.org/TufterdPyramidalNeuron",
  "@type": "Class",
  "isDefinedBy": "http://bbp.epfl.ch/neurosciencegraph/ontologies/mtypes",
  "label": "Tufterd Pyramidal Neuron",
  "notation": "TPyr",
  "prefLabel": "Tufterd Pyramidal Neuron",
  "subClassOf": "https://neuroshapes.org/PyramidalNeuron"
}

upc = {
  "@id": "https://neuroshapes.org/UntufterdPyramidalNeuron",
  "@type": "Class",
  "isDefinedBy": "http://bbp.epfl.ch/neurosciencegraph/ontologies/mtypes",
  "label": "Untufterd Pyramidal Neuron",
  "notation": "UPyr",
  "prefLabel": "Untufterd Pyramidal Neuron",
  "subClassOf": "https://neuroshapes.org/PyramidalNeuron"
}

l5_upc = {
  "@id": "http://uri.interlex.org/base/ilx_0381371",
  "@type": "Class",
  "isDefinedBy": "http://bbp.epfl.ch/neurosciencegraph/ontologies/mtypes",
  "label": "L5_UPC",
  "notation": "L5_UPC",
  "prefLabel": "Layer 5 Untufted Pyramidal Cell",
  "subClassOf": "https://neuroshapes.org/UntufterdPyramidalNeuron"
}

l6_upc = {
  "@id": "http://uri.interlex.org/base/ilx_0381377",
  "@type": "Class",
  "isDefinedBy": "http://bbp.epfl.ch/neurosciencegraph/ontologies/mtypes",
  "label": "L6_UPC",
  "notation": "L6_UPC",
  "prefLabel": "Layer 6 Untufted Pyramidal Cell",
  "subClassOf": "https://neuroshapes.org/UntufterdPyramidalNeuron"
}


mc = {
  "@id": "https://neuroshapes.org/MartinottiCell",
  "@type": "Class",
  "isDefinedBy": "http://bbp.epfl.ch/neurosciencegraph/ontologies/mtypes",
  "label": "Martinotti Cell",
  "notation": "MC",
  "prefLabel": "Martinotti Cell",
  "subClassOf": "https://neuroshapes.org/MType"
}

vpl_in = {
  "@id": "http://uri.interlex.org/base/ilx_0738235",
  "@type": "Class",
  "isDefinedBy": "http://bbp.epfl.ch/neurosciencegraph/ontologies/mtypes",
  "label": "VPL_IN",
  "notation": "VPL_IN",
  "prefLabel": "Ventral posterolateral nucleus of the thalamus interneuron",
  "subClassOf": "https://neuroshapes.org/MType"
}

In [None]:
# mtype_resources = [mtype, pc, mc, upc, tpc, l5_upc, l6_upc]
# forge.register(forge.from_json(mtype_resources))

In [172]:
model_id = "https://bbp.epfl.ch/neurosciencegraph/data/MTypeEmbeddingModel"
mtype_model = {
    "@id": model_id,
    "@type": "EmbeddingModel",
    "name": "MType Embedding Model",
    "prefLabel": "MType model",
    "similarity": "euclidean",
    "vectorDimension": 2
}

In [None]:
forge.register(forge.from_json(mtype_model))

In [173]:
model_id2 = "https://bbp.epfl.ch/neurosciencegraph/data/MTypeEmbeddingModel2"
mtype_model2 = {
    "@id": model_id2,
    "@type": "EmbeddingModel",
    "name": "MType Embedding Model 2",
    "prefLabel": "MType model 2",
    "similarity": "euclidean",
    "vectorDimension": 2
}

In [None]:
forge.register(forge.from_json(mtype_model2))

In [None]:
mtypes = [
    "https://neuroshapes.org/MType",
    "https://neuroshapes.org/PyramidalNeuron",
    "https://neuroshapes.org/TufterdPyramidalNeuron", 
    "https://neuroshapes.org/UntufterdPyramidalNeuron",
    "http://uri.interlex.org/base/ilx_0381371",
    "http://uri.interlex.org/base/ilx_0381377",
    "https://neuroshapes.org/MartinottiCell",
    "http://uri.interlex.org/base/ilx_0738235"
]
mtype_embeddings = np.array([
    [0, 0],
    [0.5, 2.5],
    [1, 4],
    [0.5, 3.8],
    [0.4, 4.3],
    [0.6, 4.5],
    [1, 1.5],
    [2, 0]
])
plt.scatter(mtype_embeddings[:, 0], mtype_embeddings[:, 1])
plt.show()

In [None]:
resources = vectors_to_resources(
    forge, mtype_embeddings, mtypes, model_id)
forge.register(resources)
forge.tag(resources, model_id.split("/")[-1])

In [None]:
mtype_embeddings2 = np.array([
    [1.5, 1],
    [1.5, 2],
    [2.5, 2],
    [2, 2.5],
    [1, 4],
    [4, 3.5],
    [3, 1],
    [3.5, 0.5]
])
plt.scatter(mtype_embeddings2[:, 0], mtype_embeddings2[:, 1])
plt.show()

In [None]:
resources = vectors_to_resources(
    forge, mtype_embeddings2, mtypes, model_id2)
forge.register(resources)
forge.tag(resources, model_id2.split("/")[-1])

In [228]:
resources = forge.search({
    "type": "SimilarityBoostingFactor"
})

In [235]:
# Create an ES view
view_id = "https://bbp.epfl.ch/neurosciencegraph/data/mtype-embedding-view"
# similarity_view = nxs.views.create_es(
#     "dke", "inference-test",
#     view_id=view_id,
#     mapping=get_es_view_mappings(2),
#     tag=model_id.split("/")[-1],
#     resource_types=[f"https://neuroshapes.org/Embedding"],
#     source_as_text=False,
#     include_metadata=True, 
#     include_deprecated=False)

In [236]:
MODEL_TAG = model_id.split("/")[-1] + "?rev=3"

In [237]:
# Create statistics resources and boosting factors
sample_size, stats = compute_statistics(forge, view_id, "euclidean")
register_stats(
    forge,view_id, sample_size, stats, "euclidean",
    MODEL_TAG, boosted=False)
boosting_factors = compute_boosting_factors(
    forge, view_id, stats, "euclidean")
register_boosting_data(
    forge, view_id, boosting_factors,
    "euclidean", MODEL_TAG)

<action> _update_one
<succeeded> True
<action> _tag_one
<succeeded> True
<action> _update_one
<succeeded> True
<action> _tag_one
<succeeded> True
<action> _update_one
<succeeded> True
<action> _tag_one
<succeeded> True
<action> _update_one
<succeeded> True
<action> _tag_one
<succeeded> True
<action> _update_one
<succeeded> True
<action> _tag_one
<succeeded> True
<action> _update_one
<succeeded> True
<action> _tag_one
<succeeded> True
<action> _update_one
<succeeded> True
<action> _tag_one
<succeeded> True
<action> _update_one
<succeeded> True
<action> _tag_one
<succeeded> True
<action> _update_one
<succeeded> True
<action> _tag_one
<succeeded> True


[Resource(_last_action=Action(error=None, message=None, operation='_tag_one', succeeded=True), _validated=False, _synchronized=True, _store_metadata={'id': 'https://bbp.epfl.ch/neurosciencegraph/data/9fa5ad41-2c46-49a2-bd46-c0251c27c667', '_constrainedBy': 'https://bluebrain.github.io/nexus/schemas/unconstrained.json', '_createdAt': '2022-01-27T16:50:48.836Z', '_createdBy': 'https://staging.nexus.ocp.bbp.epfl.ch/v1/realms/bbp/users/oshurko', '_deprecated': False, '_incoming': 'https://staging.nexus.ocp.bbp.epfl.ch/v1/resources/dke/inference-test/_/9fa5ad41-2c46-49a2-bd46-c0251c27c667/incoming', '_outgoing': 'https://staging.nexus.ocp.bbp.epfl.ch/v1/resources/dke/inference-test/_/9fa5ad41-2c46-49a2-bd46-c0251c27c667/outgoing', '_project': 'https://staging.nexus.ocp.bbp.epfl.ch/v1/projects/dke/inference-test', '_rev': 5, '_schemaProject': 'https://staging.nexus.ocp.bbp.epfl.ch/v1/projects/dke/inference-test', '_self': 'https://staging.nexus.ocp.bbp.epfl.ch/v1/resources/dke/inference-test

In [238]:
# boosting_view_id = "https://bbp.epfl.ch/neurosciencegraph/data/mtype-boosting-view"
# boosting_view = nxs.views.create_es(
#     "dke", "inference-test",
#     view_id=boosting_view_id,
#     mapping=BOOSTING_VIEW_MAPPING,
#     tag=model_id.split("/")[-1],
#     resource_types=[
#         f"https://neuroshapes.org/SimilarityBoostingFactor"],
#     source_as_text=False,
#     include_metadata=True, 
#     include_deprecated=False)

In [248]:
stats

Statistics(min=1.0130500762442376, max=1.5284653544768418, mean=1.2021944611540314, std=0.1222850303718459)

In [239]:
# Create statistics resources and boosting factors
view_id = "https://bbp.epfl.ch/neurosciencegraph/data/mtype-embedding-view"
sample_size, stats = compute_statistics(forge, view_id, "euclidean", boosting=boosting_factors)
register_stats(
    forge, view_id, sample_size, stats, "euclidean",
    MODEL_TAG, boosted=True)

<action> _update_one
<succeeded> True
<action> _tag_one
<succeeded> True


Resource(_last_action=Action(error=None, message=None, operation='_tag_one', succeeded=True), _validated=False, _synchronized=True, _store_metadata={'id': 'https://bbp.epfl.ch/neurosciencegraph/data/a379d1e1-d4da-4501-af90-edf6c6a73408', '_constrainedBy': 'https://bluebrain.github.io/nexus/schemas/unconstrained.json', '_createdAt': '2022-01-27T13:48:55.965Z', '_createdBy': 'https://staging.nexus.ocp.bbp.epfl.ch/v1/realms/bbp/users/oshurko', '_deprecated': False, '_incoming': 'https://staging.nexus.ocp.bbp.epfl.ch/v1/resources/dke/inference-test/_/a379d1e1-d4da-4501-af90-edf6c6a73408/incoming', '_outgoing': 'https://staging.nexus.ocp.bbp.epfl.ch/v1/resources/dke/inference-test/_/a379d1e1-d4da-4501-af90-edf6c6a73408/outgoing', '_project': 'https://staging.nexus.ocp.bbp.epfl.ch/v1/projects/dke/inference-test', '_rev': 16, '_schemaProject': 'https://staging.nexus.ocp.bbp.epfl.ch/v1/projects/dke/inference-test', '_self': 'https://staging.nexus.ocp.bbp.epfl.ch/v1/resources/dke/inference-test

Create a view view serving statistics

In [123]:
stat_view = nxs.views.create_es(
    "dke", "inference-test",
    view_id="https://bbp.epfl.ch/neurosciencegraph/data/mtype-statistics-view",
    mapping=STATS_VIEW_MAPPING,
    tag=model_id.split("/")[-1],
    resource_types=["https://neuroshapes.org/ElasticSearchViewStatistics"],
    source_as_text=False,
    include_metadata=True, 
    include_deprecated=False)

In [124]:
stat_view["@id"]

'https://bbp.epfl.ch/neurosciencegraph/data/mtype-statistics-view'

In [242]:
# Create an ES view
view_id = "https://bbp.epfl.ch/neurosciencegraph/data/mtype-embedding-view2"
# similarity_view = nxs.views.create_es(
#     "dke", "inference-test",
#     view_id=view_id,
#     mapping=get_es_view_mappings(2),
#     tag=model_id.split("/")[-1],
#     resource_types=[f"https://neuroshapes.org/Embedding"],
#     source_as_text=False,
#     include_metadata=True, 
#     include_deprecated=False)

In [243]:
MODEL_TAG = model_id2.split("/")[-1] + "?rev=3"

In [244]:
# Create statistics resources and boosting factors
sample_size, stats = compute_statistics(forge, view_id, "cosine")
register_stats(
    forge,view_id, sample_size, stats, "cosine",
    MODEL_TAG, boosted=False)
boosting_factors = compute_boosting_factors(
    forge, view_id, stats, "cosine")
register_boosting_data(
    forge, view_id, boosting_factors,
    "cosine", MODEL_TAG)

<action> _update_one
<succeeded> True
<action> _tag_one
<succeeded> True
<action> _update_one
<succeeded> True
<action> _tag_one
<succeeded> True
<action> _update_one
<succeeded> True
<action> _tag_one
<succeeded> True
<action> _update_one
<succeeded> True
<action> _tag_one
<succeeded> True
<action> _update_one
<succeeded> True
<action> _tag_one
<succeeded> True
<action> _update_one
<succeeded> True
<action> _tag_one
<succeeded> True
<action> _update_one
<succeeded> True
<action> _tag_one
<succeeded> True
<action> _update_one
<succeeded> True
<action> _tag_one
<succeeded> True
<action> _update_one
<succeeded> True
<action> _tag_one
<succeeded> True


[Resource(_last_action=Action(error=None, message=None, operation='_tag_one', succeeded=True), _validated=False, _synchronized=True, _store_metadata={'id': 'https://bbp.epfl.ch/neurosciencegraph/data/68dcab30-996a-4e03-b406-80f60a5866df', '_constrainedBy': 'https://bluebrain.github.io/nexus/schemas/unconstrained.json', '_createdAt': '2022-01-27T16:52:15.951Z', '_createdBy': 'https://staging.nexus.ocp.bbp.epfl.ch/v1/realms/bbp/users/oshurko', '_deprecated': False, '_incoming': 'https://staging.nexus.ocp.bbp.epfl.ch/v1/resources/dke/inference-test/_/68dcab30-996a-4e03-b406-80f60a5866df/incoming', '_outgoing': 'https://staging.nexus.ocp.bbp.epfl.ch/v1/resources/dke/inference-test/_/68dcab30-996a-4e03-b406-80f60a5866df/outgoing', '_project': 'https://staging.nexus.ocp.bbp.epfl.ch/v1/projects/dke/inference-test', '_rev': 7, '_schemaProject': 'https://staging.nexus.ocp.bbp.epfl.ch/v1/projects/dke/inference-test', '_self': 'https://staging.nexus.ocp.bbp.epfl.ch/v1/resources/dke/inference-test

In [245]:
# boosting_view_id = "https://bbp.epfl.ch/neurosciencegraph/data/mtype-boosting-view2"
# boosting_view = nxs.views.create_es(
#     "dke", "inference-test",
#     view_id=boosting_view_id,
#     mapping=BOOSTING_VIEW_MAPPING,
#     tag=model_id2.split("/")[-1],
#     resource_types=[
#         f"https://neuroshapes.org/SimilarityBoostingFactor"],
#     source_as_text=False,
#     include_metadata=True, 
#     include_deprecated=False)

In [246]:
# Create statistics resources and boosting factors
view_id = "https://bbp.epfl.ch/neurosciencegraph/data/mtype-embedding-view2"
sample_size, stats = compute_statistics(forge, view_id, "cosine", boosting=boosting_factors)
register_stats(
    forge, view_id, sample_size, stats, "cosine",
    MODEL_TAG, boosted=True)

<action> _update_one
<succeeded> True
<action> _tag_one
<succeeded> True


Resource(_last_action=Action(error=None, message=None, operation='_tag_one', succeeded=True), _validated=False, _synchronized=True, _store_metadata={'id': 'https://bbp.epfl.ch/neurosciencegraph/data/3464f95d-f20a-47fb-95e7-a27739674377', '_constrainedBy': 'https://bluebrain.github.io/nexus/schemas/unconstrained.json', '_createdAt': '2022-01-27T13:49:42.377Z', '_createdBy': 'https://staging.nexus.ocp.bbp.epfl.ch/v1/realms/bbp/users/oshurko', '_deprecated': False, '_incoming': 'https://staging.nexus.ocp.bbp.epfl.ch/v1/resources/dke/inference-test/_/3464f95d-f20a-47fb-95e7-a27739674377/incoming', '_outgoing': 'https://staging.nexus.ocp.bbp.epfl.ch/v1/resources/dke/inference-test/_/3464f95d-f20a-47fb-95e7-a27739674377/outgoing', '_project': 'https://staging.nexus.ocp.bbp.epfl.ch/v1/projects/dke/inference-test', '_rev': 14, '_schemaProject': 'https://staging.nexus.ocp.bbp.epfl.ch/v1/projects/dke/inference-test', '_self': 'https://staging.nexus.ocp.bbp.epfl.ch/v1/resources/dke/inference-test

In [125]:
# stat_view = nxs.views.create_es(
#     "dke", "inference-test",
#     view_id="https://bbp.epfl.ch/neurosciencegraph/data/mtype-statistics-view2",
#     mapping=STATS_VIEW_MAPPING,
#     tag=model_id2.split("/")[-1],
#     resource_types=["https://neuroshapes.org/ElasticSearchViewStatistics"],
#     source_as_text=False,
#     include_metadata=True, 
#     include_deprecated=False)

## Add species

In [None]:
species = {
  "@id": "https://neuroshapes.org/Species",
  "@type": "Class",
  "http://purl.obolibrary.org/obo/ncbitaxon#has_rank": {
    "@id": "http://purl.obolibrary.org/obo/NCBITaxon_species"
  },
  "isDefinedBy": "http://bbp.epfl.ch/neurosciencegraph/ontologies/speciestaxonomy/",
  "label": "Species",
  "subClassOf": "prov:Entity"
}

mus = {
  "@id": "http://purl.obolibrary.org/obo/NCBITaxon_10090",
  "@type": "Class",
  "isDefinedBy": "https://bbp.epfl.ch/ontologies/core/molecular-systems",
  "label": "Mus musculus",
  "subClassOf": "nsg:Species"
}
forge.register(forge.from_json([species]))

## Add traces and morphologies

In [None]:
trace_config = [
    ("http://uri.interlex.org/base/ilx_0381371", "L5_UPC", "http://api.brain-map.org/api/v2/data/Structure/48", "ACAv", "http://purl.obolibrary.org/obo/NCBITaxon_10090", "Mus musculus"),
    ("http://uri.interlex.org/base/ilx_0381377",  "L6_UPC", "http://api.brain-map.org/api/v2/data/Structure/44", "ILA", "http://purl.obolibrary.org/obo/NCBITaxon_10090", "Mus musculus"),
    ("https://neuroshapes.org/TufterdPyramidalNeuron", "TPC", "http://api.brain-map.org/api/v2/data/Structure/22", "PTLp", "http://purl.obolibrary.org/obo/NCBITaxon_10090", "Mus musculus"),
    ("http://uri.interlex.org/base/ilx_0738235",  "VPL_IN", "http://api.brain-map.org/api/v2/data/Structure/549", "Thalamus", "http://purl.obolibrary.org/obo/NCBITaxon_10090", "Mus musculus"),
    ("https://neuroshapes.org/MartinottiCell", "Martinotti Cell", "http://api.brain-map.org/api/v2/data/Structure/22", "PTLp", "http://purl.obolibrary.org/obo/NCBITaxon_10090", "Mus musculus")
]

In [None]:
trace_resource_ids = []
for mtype_id, mtype_label, region_id, region_label, species_id, species_label in trace_config:
    trace = {
      "@type": [
        "Trace",
        "Dataset",
        "Entity"
      ],
      "annotation": {
        "@type": [
          "Annotation",
          "MType:Annotation"
        ],
        "hasBody": {
          "@id": mtype_id,
          "@type": [
            "MType",
            "AnnotationBody"
          ],
          "label": mtype_label
        },
        "name": "M-type Annotation"
      },
      "brainLocation": {
        "@type": "BrainLocation",
        "brainRegion": {
          "@id": region_id,
          "label": region_label
        }
      },
      "subject": {
        "@type": "Subject",
        "species": {
          "@id": species_id,
          "label": species_label
        }
      }
    }
    resource = forge.from_json(trace)
    forge.register(resource)
    trace_resource_ids.append(resource.id)

In [None]:
model_id = "https://bbp.epfl.ch/neurosciencegraph/data/TraceEmbeddingModel"
trace_model = {
    "@id": model_id,
    "@type": "EmbeddingModel",
    "name": "Trace Embedding Model",
    "prefLabel": "Trace model",
    "similarity": "euclidean",
    "vectorDimension": 2
}

In [None]:
forge.register(forge.from_json(trace_model))

In [None]:
trace_embeddings = np.array([
    [1, 1],
    [1, 2],
    [2, 3],
    [6, 1],
    [4, 3]
])
plt.scatter(trace_embeddings[:, 0], trace_embeddings[:, 1])
plt.show()

In [None]:
resources = vectors_to_resources(
    forge, trace_embeddings, trace_resource_ids, model_id)
forge.register(resources)
forge.tag(resources, model_id.split("/")[-1])

In [None]:
morph_resource_ids = []
for mtype_id, mtype_label, region_id, region_label, species_id, species_label in trace_config:
    trace = {
      "@type": [
        "NeuronMorphology",
        "Dataset",
        "Entity"
      ],
      "annotation": {
        "@type": [
          "Annotation",
          "MType:Annotation"
        ],
        "hasBody": {
          "@id": mtype_id,
          "@type": [
            "MType",
            "AnnotationBody"
          ],
          "label": mtype_label
        },
        "name": "M-type Annotation"
      },
      "brainLocation": {
        "@type": "BrainLocation",
        "brainRegion": {
          "@id": region_id,
          "label": region_label
        }
      },
      "subject": {
        "@type": "Subject",
        "species": {
          "@id": species_id,
          "label": species_label
        }
      }
    }
    resource = forge.from_json(trace)
    forge.register(resource)
    morph_resource_ids.append(resource.id)

In [None]:
model_id = "https://bbp.epfl.ch/neurosciencegraph/data/NeuronMorphologyEmbeddingModel"
morph_model = {
    "@id": model_id,
    "@type": "EmbeddingModel",
    "name": "NeuronMorphology Embedding Model",
    "prefLabel": "NeuronMorphology model",
    "similarity": "euclidean",
    "vectorDimension": 2
}

In [None]:
forge.register(forge.from_json(morph_model))

In [None]:
resources = vectors_to_resources(
    forge, trace_embeddings, morph_resource_ids, model_id)
forge.register(resources)
forge.tag(resources, model_id.split("/")[-1])