In [None]:
import getpass
import math
from collections import namedtuple

import numpy as np
import matplotlib.pyplot as plt

from urllib.parse import quote_plus

from kgforge.core import KnowledgeGraphForge

# Helpers

In [None]:
Statistics = namedtuple('Statistics', 'min max mean std')

In [None]:
def set_elastic_view(forge, view):
    forge._store.service.elastic_endpoint["endpoint"] = "/".join(
        (VIEWS_ENDPOINT, quote_plus(view), "_search"))


def get_all_vectors(forge, resource_limit):
    all_embeddings = forge.elastic(f"""{{
        "from" : 0,
        "size" : {resource_limit},
        "query": {{
            "term": {{"_deprecated": false}}
        }}
    }}
    """)
    vectors = {
        result._source["@id"]: result._source["embedding"]
        for result in all_embeddings
    }
    return vectors


def get_all_scores(forge, vectors, formula, param_name, resource_limit=200, boosting=None):
    score_values = set()
    for k, vector in vectors.items():
        query = f"""{{
          "size": {len(vectors)},
          "query": {{
            "script_score": {{
                "query": {{
                    "bool" : {{
                      "must_not" : {{
                        "term" : {{ "@id": "{k}" }}
                      }},
                      "must": {{ "exists": {{ "field": "embedding" }} }}
                    }}
                }},
                "script": {{
                    "source": "{formula}",
                    "params": {{
                      "{param_name}": {vector}
                    }}
                }}
            }}
          }}
        }}"""

        res = forge.elastic(query)
        for el in res:
            boost_factor = 1
            if boosting:
                boost_factor = 1 + boosting[el._source["@id"]]
            score_values.add(el._score * boost_factor)
    score_values = np.array(list(score_values))
    return score_values


def get_view_stats(forge, vectors, formula, param_name, resource_limit=200, boosting=None):
    score_values = get_all_scores(
        forge, vectors, formula, param_name, resource_limit, boosting)
    return score_values, Statistics(
        score_values.min(),
        score_values.max(),
        score_values.mean(),
        score_values.std())


def register_stats(forge, view_id, sample_size, stats, formula, param_name, boost=False):  
    stats_resource = forge.from_json({
        "type": "ElasticSearchViewStatistics",
        "boosted": boosted,
        "scriptScore": formula,
        "vectorParameter": param_name,
        "series": [
            {
              "statistic": "min",
              "unitCode": "dimensionless",
              "value": stats.min
            },
            {
              "statistic": "max",
              "unitCode": "dimensionless",
              "value": stats.max
            },
            {
              "statistic": "mean",
              "unitCode": "dimensionless",
              "value": stats.mean
            },
            {
              "statistic": "standard deviation",
              "unitCode": "dimensionless",
              "value": stats.std
            },
            {
              "statistic": "N",
              "unitCode": "dimensionless",
              "value": sample_size
            }
        ],
        "derivation": {
            "type": "Derivation",
            "entity": {
                "id": view_id
            }
        },
#         "generation": {
#             "type": "Generation",
#             "activity": {
#                 "used": {
#                     "id": 
#                 }
#             }
#         }
    })
    forge.register(stats_resource)
    
    
def get_score_deviation(forge, point_id, vector, k, formula, param_name):
    query = f"""{{
      "size": {k},
      "query": {{
        "script_score": {{
          "query": {{
                "exists": {{
                    "field": "embedding"
                }}
          }},
          "script": {{
            "source": "{formula}",
            "params": {{
              "{param_name}": {vector}
            }}
          }}
        }}
      }}
    }}"""

    result = forge.elastic(query)
    scores = set()
    for el in result:
        if point_id != el._source["@id"]:
            scores.add(el._score)
    scores = np.array(list(scores))
    return math.sqrt(((1 - scores)**2).mean())


def register_boosting_data(forge, view_id, deviation, formula, param_name):
    factors = [
        {"entity": {"@id": k}, "value": 1 + v, "unitCode": "dimensionless"}
        for k, v in data.items()
    ]
    resource = forge.from_json({
        "type": "SimilarityBoostingSeries",
        "scriptScore": formula,
        "vectorParameter": param_name,
        "series": factors,
        "derivation": {
            "type": "Derivation",
            "entity": {
                "id": view_id
            }
        }
    })
    forge.register(resource)

# Configure forge

In [None]:
TOKEN = getpass.getpass()

In [None]:
ENDPOINT = "https://staging.nexus.ocp.bbp.epfl.ch/v1"
ORG = "dke"
PROJECT = "seu-embeddings"
VIEWS_ENDPOINT = "/".join(
    (ENDPOINT, "views", quote_plus(ORG), quote_plus(PROJECT)))

In [None]:
forge = KnowledgeGraphForge("https://raw.githubusercontent.com/BlueBrain/nexus-forge/master/examples/notebooks/use-cases/prod-forge-nexus.yml",
                            token=TOKEN, 
                            endpoint=ENDPOINT,        
                            bucket=f"{ORG}/{PROJECT}")

# Get all embedding vectors

In [None]:
vector_parameter = "query_vector"

In [None]:
views = {
    "https://bbp.epfl.ch/neurosciencegraph/data/views/es/dendrite-coprojection-embeddings-view": "cosine_similarity",
    "https://bbp.epfl.ch/neurosciencegraph/data/views/es/axon-coprojection-embeddings-view": "cosine_similarity",
    "https://bbp.epfl.ch/neurosciencegraph/data/views/es/neurite-features-view": "l2_similarity"
}

In [None]:
formulas = {
    "cosine_similarity": "(cosineSimilarity(params.query_vector, doc['embedding']) + 1.0) / 2",
    "l2_similarity": "1 / (1 + l2norm(params.query_vector, doc['embedding']))"
}

In [None]:
vectors = {}
for view in views:
    set_elastic_view(forge, view)
    vectors[view] = get_all_vectors(forge, 200)

# Compute and register stats of ES indices

In [None]:
{
    "type": "RecommenderConfiguration",
    "mappings": [
        {
            "view": {"@id"}
            "model": {"@id"}
        }
    ]
}

In [None]:
global_stats = {}
for view in views:
    set_elastic_view(forge, view)
    formula = formulas[views[view]]
    values, stats = get_view_stats(forge, vectors[view], formula, vector_parameter, 200)
    global_stats[view] = values
#     register_stats(forge, view, values.shape[0], stats, formula, vector_parameter)

In [None]:
for view in views:
    print("View ", view)
    plt.hist(
        global_stats[view],
        bins=100)
    plt.title("Distribution of similarities")
    plt.show()

# Compute and register boosting factors for different points

In [None]:
deviations = {}
for view in views:
    forge._store.service.elastic_endpoint["endpoint"] = "/".join(
        (VIEWS_ENDPOINT, quote_plus(view), "_search"))
    deviations[view] = {}
    for point_id, vector in vectors[view].items():
        deviations[view][point_id] = get_score_deviation(
            forge, point_id, vector, 10, formulas[views[view]], vector_parameter)

In [None]:
for view, data in deviations.items():
    register_boosting_data(forge, view, data, formulas[views[view]], vector_parameter)

In [None]:
boosted_global_stats = {}
for view in views:
    forge._store.service.elastic_endpoint["endpoint"] = "/".join(
        (VIEWS_ENDPOINT, quote_plus(view), "_search"))
    formula = formulas[views[view]]
    boosted_global_stats[view], stats = get_view_stats(
        forge, vectors[view], formula, vector_parameter, 200, deviations[view])
    register_stats(forge, view, values.shape[0], stats, formula, vector_parameter, boosted=True)

In [None]:
for view in views:
    print("View ", view)
    plt.hist(
        boosted_global_stats[view],
        bins=100)
    plt.title("Distribution of similarities")
    plt.show()

In [None]:
import rdflib
from rdflib.paths import OneOrMore

In [None]:
g = rdflib.Graph()
g.parse("/Users/oshurko/Desktop/uberon.owl", format="xml")

In [None]:
from bmo_tools.ontologies import subontology_from_term

In [None]:
entry_point = rdflib.URIRef("http://purl.obolibrary.org/obo/UBERON_0000955")
part_of = rdflib.URIRef("http://purl.obolibrary.org/obo/BFO_0000050")

In [None]:
part_of_edges = set()

In [None]:
for s, _, _ in g.triples((None, None, part_of)):
#     print(s, p)

    target = None
    for o in g.objects(s, rdflib.OWL.someValuesFrom):
        target = o


    source = None
    for (ss, pp) in g.subject_predicates(s):
        if g.label(ss):
            source = ss
        else:
            for p, o in g.predicate_objects(ss):
                print("\t\t", ss, p, o)
            print(ss, pp, s)
    if source and target:
        part_of_edges.add((source, target))
    print()

In [None]:
# Extract everything that is part of brain

In [None]:
for s, p in g.subject_predicates(entry_point):
#     print(s, p)
#     for (p, o) in g.predicate_objects(s):
#         print("\t", s, p, o)
    print(s)
    for (ss, p) in g.subject_predicates(s):
        print("\t", ss, p, s)
        for (pp, oo) in g.predicate_objects(ss):
            print("\t\t", ss, pp, oo)
        for (sss, pp) in g.subject_predicates(ss):
            print("\t\t", sss, pp, ss, g.label(ss))
#     for p, o in g.predicate_objects(s):
#         print(p, g.label(o))

In [None]:
for s in g.subjects(rdflib.OWL.onProperty, part_of):
    print(s)
    for p, o in g.predicate_objects(s):
        print(p, g.label(o))
    break

In [None]:
g.label()

In [None]:
for s in g.subjects(rdflib.URIRef("http://purl.obolibrary.org/obo/BFO_0000050"), entry_point):
    print(g.label(s))

In [None]:
for s, p in g.subject_predicates(entry_point):
    print(s, p)
    for pp, oo in g.predicate_objects(s):
        print("\t", pp, oo)

In [None]:
for p, o in g.predicate_objects(entry_point):
    print(p, o)

In [None]:
list(g.subjects(rdflib.RDFS.subClassOf * OneOrMore, entry_point)) + [entry_point]

In [None]:
sububeron.serialize("/Users/oshurko/Desktop/nervous_uberon.ttl", format="ttl")

In [None]:
for s, p, o in sububeron.triples((None, None, None)):
    print(s, p, o)

In [None]:
for (s, p, a)

In [None]:
UBERON:0001016