In [None]:
%matplotlib inline

### Parse and store SNOMED taxonomy

In [None]:
from rdflib.extras.external_graph_libs import *
from rdflib import Graph
import networkx as nx
import pathlib

In [None]:
def node_to_id(node):
    return node.split('/')[-1]

In [None]:
path = pathlib.Path("./data/snomed_taxonomy.rdf")

# every node with 'http://snomed.info/id/SNOMED_ID' is child of 'http://www.w3.org/2002/07/owl#Class'
# 'http://snomed.info/id/138875005' is top node --> SNOMED CT Concept (SNOMED RT+CTV3)
# graph is directed in top direction (that is, a neighbor is/are only the direct parent(s))

graph = Graph()
graph.parse(path, format="application/rdf+xml")
nx_graph = rdflib_to_networkx_multidigraph(graph)

id_node_mapping = {}
for n in list(nx_graph.nodes()):
    _id = node_to_id(n)
    # Remove meta nodes (e.g. 'owl#Class') and top level node 'SNOMED CT Concept (SNOMED RT+CTV3)'
    if (not _id.isnumeric()) or (node_to_id(n) == "138875005"):
        nx_graph.remove_node(n)
        continue
    id_node_mapping[_id] = n

In [None]:
def id_to_node(snomed_id: str, mapping_dict: dict = id_node_mapping):
    node = mapping_dict.get(snomed_id, None)
    return node

In [None]:
def parents_of_id(snomed_id: str, mapping_dict: dict = id_node_mapping):
    for neighbor in nx_graph.neighbors(id_to_node(snomed_id, mapping_dict)):
        _id = node_to_id(neighbor)
        if not _id.isnumeric():
            continue
        yield _id

In [None]:
def children_of_id(snomed_id: str, mapping_dict: dict = id_node_mapping):
    for predecessor in nx_graph.predecessors(id_to_node(snomed_id, mapping_dict)):
        _id = node_to_id(predecessor)
        if not _id.isnumeric():
            continue
        yield _id

---
### Read SNOMED Interface Terminology

In [None]:
import collections

In [None]:
interface_terminology_path = pathlib.Path("./data/SCT-GIT_de_drugs_nlp.dat")
interface_terminology = collections.defaultdict(list)

for line in interface_terminology_path.open('r', encoding='utf-8').readlines():
    # first name in the resulting list for each entry is the preferred name
    snomed_id, internal_id, concept_name, interface_name = line.strip().split("\t")
    interface_terminology[snomed_id].append(interface_name)
top_level_concepts = [node_to_id(n) for n in nx_graph.nodes() if
                      nx_graph.out_degree(n) == 0 and node_to_id(n) in interface_terminology]

In [None]:
len(interface_terminology)

In [None]:
len(id_node_mapping)

In [None]:
len(top_level_concepts)

In [78]:
for source_node, successors_list in nx.bfs_successors(nx_graph.reverse(copy=True), id_to_node(top_level_concepts[1])):
    

[(rdflib.term.URIRef('http://snomed.info/id/362981000'),
  [rdflib.term.URIRef('http://snomed.info/id/297289008'),
   rdflib.term.URIRef('http://snomed.info/id/106227002'),
   rdflib.term.URIRef('http://snomed.info/id/118598001'),
   rdflib.term.URIRef('http://snomed.info/id/272394005'),
   rdflib.term.URIRef('http://snomed.info/id/272099008'),
   rdflib.term.URIRef('http://snomed.info/id/260245000'),
   rdflib.term.URIRef('http://snomed.info/id/422096002'),
   rdflib.term.URIRef('http://snomed.info/id/309832006'),
   rdflib.term.URIRef('http://snomed.info/id/284009009'),
   rdflib.term.URIRef('http://snomed.info/id/766940004'),
   rdflib.term.URIRef('http://snomed.info/id/726711005'),
   rdflib.term.URIRef('http://snomed.info/id/767524001'),
   rdflib.term.URIRef('http://snomed.info/id/255203001'),
   rdflib.term.URIRef('http://snomed.info/id/272063003'),
   rdflib.term.URIRef('http://snomed.info/id/115670007'),
   rdflib.term.URIRef('http://snomed.info/id/419492006'),
   rdflib.term.

---
### Create/upload concepts to TOP FW

In [None]:
import requests
import uuid
import pickle

In [None]:
username = ""
password = ""
api_token = requests.post(
    url = "https://top.imise.uni-leipzig.de/auth/realms/top-realm/protocol/openid-connect/token",
    data = {
        "client_id": "top-frontend",
        "username": username,
        "password": password,
        "grant_type": "password"
    }
)

In [53]:
pickle_path = "./data/snomed_concept_to_top_id.pickle"
post_array = []
snomed_concept_to_top_id = pickle.load(pathlib.Path(pickle_path).open('rb')) if pathlib.Path(pickle_path).exists() else {}

for concept in top_level_concepts:
    if concept in interface_terminology:
        _id = str(uuid.uuid4()) if concept not in snomed_concept_to_top_id else snomed_concept_to_top_id[concept]
        _data = {
            "entityType": "single_concept",
            "id": _id,
            "titles": [
                {
                    "lang": "de",
                    "text": interface_terminology[concept][0]
                }
            ],
            "codes": [
                {
                    "uri": f"http://snomed.info/id/{concept}",
                    "codeSystem": {
                        "uri": "http://snomed.info/id",
                        "shortName": "SNOMED CT"
                    },
                    "code": concept,
                }
            ],
            # "superConcepts": []  # ToDo: add superConcept resolution
        }
        
        if len(interface_terminology[concept]) > 1:
            _data["synonyms"] = [
                {
                    "lang": "de",
                    "text": syn
                } for syn in interface_terminology[concept][1:]
            ]
            
        post_array.append(_data)
        snomed_concept_to_top_id[concept] = _id
    break
pickle.dump(snomed_concept_to_top_id, pathlib.Path(pickle_path).open('wb'))

In [54]:
organisation = "imise"
repository = "snomed_interface_terminology"
request = requests.post(
    url=f"https://top.imise.uni-leipzig.de/api/{organisation}/{repository}/entity/bulk",
    headers={"Authorization": "Bearer " + api_token.json()['access_token']},
    json=post_array
)