In [None]:
%matplotlib inline

### Parse and store SNOMED taxonomy

In [None]:
from rdflib.extras.external_graph_libs import *
from rdflib import Graph
import networkx as nx
import pathlib

In [None]:
def node_to_id(node):
    return node.split('/')[-1]

In [None]:
path = pathlib.Path("./data/snomed_taxonomy.rdf")

# every node with 'http://snomed.info/id/SNOMED_ID' is child of 'http://www.w3.org/2002/07/owl#Class'
# 'http://snomed.info/id/138875005' is top node --> SNOMED CT Concept (SNOMED RT+CTV3)
# graph is directed in top direction (that is, a neighbor is/are only the direct parent(s))

graph = Graph()
graph.parse(path, format="application/rdf+xml")
nx_graph = rdflib_to_networkx_multidigraph(graph)

id_node_mapping = {}
for n in list(nx_graph.nodes()):
    _id = node_to_id(n)
    # Remove meta nodes (e.g. 'owl#Class') and top level node 'SNOMED CT Concept (SNOMED RT+CTV3)'
    if (not _id.isnumeric()) or (node_to_id(n) == "138875005"):
        nx_graph.remove_node(n)
        continue
    id_node_mapping[_id] = n

In [None]:
def id_to_node(snomed_id: str, mapping_dict: dict = id_node_mapping):
    node = mapping_dict.get(snomed_id, None)
    return node

In [None]:
def parents_of_id(snomed_id: str, mapping_dict: dict = id_node_mapping):
    for neighbor in nx_graph.neighbors(id_to_node(snomed_id, mapping_dict)):
        _id = node_to_id(neighbor)
        if not _id.isnumeric():
            continue
        yield _id

In [None]:
def children_of_id(snomed_id: str, mapping_dict: dict = id_node_mapping):
    for predecessor in nx_graph.predecessors(id_to_node(snomed_id, mapping_dict)):
        _id = node_to_id(predecessor)
        if not _id.isnumeric():
            continue
        yield _id

---
### Read SNOMED Interface Terminology

In [None]:
import collections

In [None]:
interface_terminology_path = pathlib.Path("./data/SCT-GIT_de_drugs_nlp.dat")
interface_terminology = collections.defaultdict(list)

for line in interface_terminology_path.open('r', encoding='utf-8').readlines():
    # first name in the resulting list for each entry is the preferred name
    snomed_id, internal_id, concept_name, interface_name = line.strip().split("\t")
    interface_terminology[snomed_id].append(interface_name)
first_level_concepts = [node_to_id(n) for n in nx_graph.nodes() if
                        nx_graph.out_degree(n) == 0 and node_to_id(n) in interface_terminology]

In [None]:
len(interface_terminology)

In [None]:
len(id_node_mapping)

In [None]:
len(first_level_concepts)

---
### Create/upload concepts to TOP FW

In [None]:
import requests
import uuid
import pickle
from tqdm.autonotebook import tqdm

In [None]:
def create_top_fw_concept(concept_id: str, concept_id_store: dict, top_id_store: dict, parent_id: str = None):
    if concept_id not in top_id_store:
        _id = str(uuid.uuid4())
        top_id_store[concept_id] = _id
    else:
        _id = top_id_store[concept_id]
        
    _data = {
        "entityType": "single_concept",
        "id": _id,
        "titles": [
            {
                "lang": "de",
                "text": concept_id_store[concept_id][0]
            }
        ],
        "codes": [
            {
                "uri": f"http://snomed.info/id/{concept_id}",
                "codeSystem": {
                    "uri": "http://snomed.info/id",
                    "shortName": "SNOMED CT"
                },
                "code": concept_id,
            }
        ] 
    }
    
    if len(concept_id_store[concept_id]) > 1:
        _data["synonyms"] = [
            {
                "lang": "de",
                "text": syn
            } for syn in concept_id_store[concept_id][1:]
        ]
        
    if parent_id is not None and parent_id in concept_id_store and parent_id in top_id_store:
        _data["superConcepts"] = [
            {
                "entityType": "single_concept",
                "id":top_id_store[parent_id] 
            }
        ]
    
    return _data

In [None]:
def post_to_top_fw(post_data: list, post_api_token: str, organisation: str = "imise", repository: str = "snomed_interface_terminology"):
    request = requests.post(
        url=f"https://top.imise.uni-leipzig.de/api/{organisation}/{repository}/entity/bulk",
        headers={"Authorization": "Bearer " + post_api_token},
        json=post_data
    )
    return request.json()

In [None]:
username = input("Please give your TOP FW username: ")
password = input("Please give your TOP FW password: ")
api_token = requests.post(
    url = "https://top.imise.uni-leipzig.de/auth/realms/top-realm/protocol/openid-connect/token",
    data = {
        "client_id": "top-frontend",
        "username": username,
        "password": password,
        "grant_type": "password"
    }
)

In [40]:
pickle_path = "./data/snomed_concept_to_top_id.pickle"
post_array = []
snomed_concept_to_top_id = pickle.load(pathlib.Path(pickle_path).open('rb')) if pathlib.Path(pickle_path).exists() else {}

# Create Top Level Concepts
# for concept in tqdm(first_level_concepts):
#     if concept in interface_terminology:
#         post_array.append(
#             create_top_fw_concept(concept, interface_terminology, snomed_concept_to_top_id)
#         )

for first_level_concept in first_level_concepts:
    for source_node, successors_list in nx.bfs_successors(nx_graph.reverse(copy=True), id_to_node(first_level_concept)):
        _source_node_concept = node_to_id(source_node)
        if _source_node_concept not in interface_terminology:
            continue
            
        if _source_node_concept not in snomed_concept_to_top_id:
            post_array.append(
                create_top_fw_concept(_source_node_concept, interface_terminology, snomed_concept_to_top_id)
            )
        
        for node in successors_list:
            _child_node_concept = node_to_id(node)
            if _child_node_concept not in interface_terminology:
                continue
                
            post_array.append(
                create_top_fw_concept(_child_node_concept, interface_terminology, snomed_concept_to_top_id, _source_node_concept)
            )
            
            # Post batch of 100
            if len(post_array) == 100:
                post_to_top_fw(post_array, api_token.json()['access_token'])
                post_array.clear()
#pickle.dump(snomed_concept_to_top_id, pathlib.Path(pickle_path).open('wb'))