In [23]:
from collections import defaultdict
import sqlalchemy as sa
import sqlalchemy.orm as so
import sqlalchemy.sql.sqltypes as sss
import sqlalchemy.sql.expression as exp
import re

from omop_alchemy.db import Base, engine
from omop_alchemy.tables.vocabulary import Concept, Concept_Relationship, Concept_Ancestor

In [2]:
import enum

class ConceptEnum(enum.Enum):

    @classmethod
    def member_values(cls):
        return (s.value for s in cls)
    
    @classmethod
    def is_member(cls, val):
        return not val or val in [s.value for s in cls]

    @classmethod
    def labels(cls):
        return [s.name for s in cls]
    

class TNM(ConceptEnum):
    parent_mod = 734320

In [50]:

class VocabLookup:
    # base class for custom vocabulary lookups

    # correction parameter holds an ordered list of callable corrections 
    # - try match the raw input string first 
    # - then apply corrections in order and return the first match 
    # - examples of correction functions would be stripping punctuation, 
    #   spelling correction functions

    def __init__(self, 
                 unknown=0,           # TODO: tbd do we want to define behaviours when mapping is not found?
                 parent=None,         # used when you want to pull all child concepts under a given parent into the lookup
                 domain=None,         # otherwise we are grabbing by specification of domain
                 standard_only=True): # for when you want to toggle between grabbing children from standard concepts strictly or not
        self._unknown = unknown
        self._lookup = defaultdict(self.return_unknown)
        self._domain = domain
        self._standard_only = standard_only
        # parent parameter is the high-level concept under which you want to pull
        # in all available matches - e.g. TNM stages, which can grab all concepts 
        # that fall under the parent concept from the concept_relationship table
        self._parent = parent.value if isinstance(parent, ConceptEnum) else parent
        self._correction = None
        with so.Session(engine) as session:
            # TBD: question - do we need to provide support for combining parent 
            # definition with domain def? is this a likely use-case? it won't fail 
            # for now, but perhaps check?
            if parent is not None:
                self.get_lookup(session)
            if domain is not None:
                self.get_domain_lookup(session)
        
        # TODO: consider generalisable creation of custom maps to host 
        # manual mappings of local concepts to OMOP concepts as well?

    
    def get_domain_lookup(self, session):
        # returns a default dictionary that contains all
        # concepts under a given domain for rapid lookups
        
        d = session.query(Concept.concept_name,
                          Concept.concept_id
                         ).filter(Concept.domain_id==self._domain).all()
        for row in d:
            self._lookup[row.concept_name.lower().strip()] = row.concept_id
    
    def get_standard_hierarchy(self, session):
        children = session.query(Concept_Ancestor
                                ).options(so.joinedload(Concept_Ancestor.descendant)
                                ).filter(Concept_Ancestor.ancestor_concept_id == self._parent).distinct().all()
        return [c.descendant for c in children]
        
    def get_all_hierarchy(self, this_level, concepts, session):
        # TODO: check if we want to do this thru Concept_Ancestor strictly
        # if confirmed we only want to be doing for standard concepts?
        # this is iterative and slow way of doing it to arbitrary depths
        # otherwise...but good if you want to include non-standard 
        # children - maybe useful in condition_concept_id?

        if len(this_level) == 0:
            return concepts
        children = session.query(Concept
                                ).join(Concept_Relationship, Concept_Relationship.concept_id_2==Concept.concept_id
                                ).filter(Concept_Relationship.concept_id_1.in_(this_level)
                                ).filter(Concept_Relationship.relationship_id=='Subsumes').distinct().all()
        next_level = tuple([c.concept_id for c in children if c not in concepts])
        concepts += children
        concepts = self.get_all_hierarchy(next_level, concepts, session)
        return concepts

    def get_lookup(self, session):
        # returns a default dictionary that contains all
        # concepts under a given parent concept and the
        # appropriate unknown value for the target context
        if not self._standard_only:
            concepts = self.get_all_hierarchy(tuple([self._parent]), [], session)
        else:
            concepts = self.get_standard_hierarchy(session)
        for c in concepts:
            self._lookup[c.concept_name.lower()] = c.concept_id
            self._lookup[c.concept_code.lower()] = c.concept_id

    def return_unknown(self):
        return self._unknown.value

    def lookup_exact(self, term):
        if term == None:
            term = ''
        return self._lookup[term.lower().strip()]

    def lookup(self, term):
        if term == None:
            term = ''
        value = self._lookup[term.lower().strip()]
        if self._correction is not None:
            for c in self._correction:
                if value != self._unknown:
                    break
                value = self._lookup[c(term).lower().strip()]
        return value

In [51]:
tnm = VocabLookup(parent=TNM.parent_mod)

AttributeError: 'Concept_Ancestor' object has no attribute 'descendent'

In [49]:
l[0].concept_id

1633306

In [47]:
with so.Session(engine) as session:
    l = get_all_hierarchy(, [], session)

In [36]:
from_rel = set([ll[0] for ll in l])

In [34]:
from_a_codes = set([a.descendant.concept_id for a in from_a])

In [37]:
[a for a in from_a_codes if a not in from_rel]

[734320]

734320