In [None]:
%matplotlib inline

### Parse and store SNOMED taxonomy

In [35]:
from rdflib.extras.external_graph_libs import *
from rdflib import Graph, URIRef, Literal
import networkx as nx
import pathlib

In [68]:
def node_to_id(node):
    return node.split('/')[-1]

In [52]:
path = pathlib.Path("./data/snomed_taxonomy.rdf")

# every node with 'http://snomed.info/id/SNOMED_ID' is child of 'http://www.w3.org/2002/07/owl#Class'
# 'http://snomed.info/id/138875005' is top node --> SNOMED CT Concept (SNOMED RT+CTV3)
# graph is directed in top direction (that is, a neighbor is/are only the direct parent(s))

graph = Graph()
graph.parse(path, format="application/rdf+xml")
nx_graph = rdflib_to_networkx_multidigraph(graph)

id_node_mapping = {}
for n in list(nx_graph.nodes()):
    _id = node_to_id(n)
    if not _id.isnumeric():
        continue
    id_node_mapping[_id] = n

In [67]:
def id_to_node(snomed_id: str, mapping_dict: dict = id_node_mapping):
    node = mapping_dict.get(snomed_id, None)
    return node

In [71]:
def parents_of_id(snomed_id: str, mapping_dict: dict = id_node_mapping):
    for neighbor in nx_graph.neighbors(id_to_node(snomed_id, mapping_dict)):
        _id = node_to_id(neighbor)
        if not _id.isnumeric():
            continue
        yield _id    

---
### Read SNOMED Interface Terminology

In [73]:
import collections

In [78]:
interface_terminology_path = pathlib.Path("./data/SCT-GIT_de_drugs_nlp.dat")
interface_terminology = collections.defaultdict(list)

for line in interface_terminology_path.open('r', encoding='utf-8').readlines():
    # first name in the resulting list for each entry is the preferred name
    snomed_id, internal_id, concept_name, interface_name = line.strip().split("\t")
    interface_terminology[snomed_id].append(interface_name)

In [81]:
len(interface_terminology)

313548

In [82]:
len(id_node_mapping)

366790

In [85]:
interface_terminology["764084004"]

['genetically modifizierte T-Zelle',
 'genetically modifizierter T-Lymphozyt',
 'genetically geänderte T-Zelle',
 'genetically geänderter T-Lymphozyt']

In [91]:
concept = "764084004"
print(interface_terminology.get(concept))
for i in parents_of_id(concept):
    print(interface_terminology.get(i))

['genetically modifizierte T-Zelle', 'genetically modifizierter T-Lymphozyt', 'genetically geänderte T-Zelle', 'genetically geänderter T-Lymphozyt']
['immunologische Substanz']
['Medikament', 'Arzneimittel', 'Arznei', 'Wirkstoff oder Arzneimittel', 'pharmakologische Substanz']


---
### Create/upload concepts to TOP FW

In [None]:
# Entity Model to be uploaded to TOP FW
{
    "entityType": "single_concept",
    "id": "SNOMED_ID?",
    "titles": [
        {
            "language": "de",
            "text": "INTERFACE_NAME_PREFERRED"
        }
    ],
    "synonyms": [
        {
            "language": "de",
            "text": "INTERFACE_NAME_next"
        },
        ...
    ],
    "codes": [
        {
            "codeSystem": {
                "uri": "http://snomed.info/id"
            },
            "code": "SNOMED_ID",
        }
    ],
    "superConcepts": [
        
    ]
}