In [5]:
from glob import glob
import pandas as pd
from collections import defaultdict
import json

# Generate a CUI List for a Given 'Parent' Concept
- all Children of 'cardiac finding', SCTID: 106063007
- For medcat v0.x and medcat v1.x

Relation file from SNOMED-CT Preprocessing tutorial: https://github.com/CogStack/MedCATtutorials/blob/main/notebooks/specialised/Preprocessing_SNOMED_CT.ipynb, 

In section "SNOMED Relationships"


In [10]:
rels = json.load(open('isa_rela_ch2pt_20200228.txt'))

In [44]:
# remove the S- if exists
rels = {k[2:]: [v_i[2:] for v_i in v] for k,v in rels.items()}

In [48]:
class Node():
    def __init__(self, sctid, name):
        self.sctid = sctid
        self.name = name
        self.parents = {}
        self.children = {}
    
    def __repr__(self):
        parents_str = "\n\t".join(f"{p.sctid}:{p.name}" for p in self.parents.values())
        children_str = "\n\t".join([f"{c.sctid}:{c.name}" for c in self.children.values()])
        return f'{self.sctid}: {self.name}\n' +\
            f'Parents:\n\t{parents_str}\n' +\
            f'Children:\n\t{children_str}\n' 

In [49]:
def all_children(node: Node):
    def collect(nodes, collected_nodes):
        for n in nodes:
            collected_nodes.append(n)
            if len(n.children.values()) > 0:
                collected_nodes.extend(collect(n.children.values(), []))
        return collected_nodes
        
    all_children = []
    collect(node.children.values(), all_children)
    return all_children

In [52]:
valid_rels = {k:v for k,v in rels.items() if k in cdb.cui2preferred_name}

In [55]:
all_nodes = {}
def set_rels(node, parent_ids):
    for p_id in parent_ids:
        if p_id in all_nodes:
            parent_node = all_nodes[p_id]
            node.parents[parent_node.sctid] = parent_node
            parent_node.children[node.sctid] = node
        else:
            if p_id not in cdb.cui2preferred_name:
                continue
            parent_node = Node(p_id, cdb.cui2preferred_name[p_id])
            node.parents[parent_node.sctid] = parent_node
            all_nodes[parent_node.sctid] = parent_node
            parent_node.children[node.sctid] = node
        if parent_node.sctid in valid_rels:
            set_rels(parent_node, valid_rels[parent_node.sctid])
    
for sctid, name in cdb.cui2preferred_name.items():
    n = all_nodes.get(sctid, Node(sctid, name))
    all_nodes[sctid] = n
    # add parents / children
    if sctid in valid_rels:
        set_rels(n, valid_rels[sctid])

In [58]:
# Disorder of cardiovascular system (disorder) SCTID: 49601007
chlds = all_children(all_nodes['49601007'])
disorder_sctids = [c.sctid for c in set(chlds)]

In [60]:
chlds = all_children(all_nodes['301095005'])
finding_sctids = [c.sctid for c in set(chlds)]

In [61]:
all_ids = set(disorder_sctids) | set(finding_sctids)

In [154]:
# mcv1_ids = [v[2:] for v in all_ids]

In [65]:
len(cdb.cui2names)

739788

In [157]:
json.dump(list(mcv1_ids), open('cardio_disorder_finding_sctids_mcv1.json', 'w'))

In [149]:
json.dump(list(all_ids), open('cardio_disorder_finding_sctids.json', 'w'))

In [166]:
sctids = json.load(open('cardio_disorder_finding_sctids.json'))
pretty_names = [cdb.cui2pretty_name[s_id] for s_id in sctids]

In [28]:
snomed_browser_links = [f'https://termbrowser.nhs.uk/?perspective=full&conceptId1={sct_id.replace("S-", "")}&edition=uk-edition&release=v20210317&server=' +
                        'https://termbrowser.nhs.uk/sct-browser-api/snomed&langRefset=999001261000000100,999000691000001104' for sct_id in sctids]

In [30]:
pd.DataFrame({'SCTIDs': sctids, 'Concept Name': pretty_names, 'Browser Link': snomed_browser_links}).to_csv('cardio_disorder_concepts.csv')