## Searching
Searching should be extensible to allow sql search for larger data than KGs.
For this we have a many step search.
- search for ancestors, parents first
  - get the names of these, include-exclude
  - merge all of the nodes, merging with include-exclude
- for docs in the ending KnowledgeGraph
  - filter by type
  - filter by body length
  - **separate method:** search in the body

In [1]:
import json
import pprint
from anytree.search import findall
from comorbid_graphs import ComorbidGraph, ComorbidGraphNode

In [2]:
DIRECTION = ['inc_', 'exc_', 'include_', 'exclude_']
FILTERS = ['name', 'phrase', 'type', 'text_longer', 'ancestor', 'parent']

In [3]:
with open('../tests/fixtures/symp_tree.json') as f:
    data = json.load(f)
cg = ComorbidGraph(data, ComorbidGraphNode)

In [10]:
def get_in_name(f, list_words, negate=False):
    return findall(f, filter_=lambda node: any(x in node.name for x in list_words))

pprint.pprint(get_in_name(cg.tree, ['symptoms', 'abdominal']))

(ComorbidGraphNode(annotations={'hasDbXref': [{'identifier': 'hasDbXref', 'language': 'undefined', 'value': 'UMLS_CUI:C0000737', 'type': 'label'}, {'identifier': 'hasDbXref', 'language': 'undefined', 'value': 'UMLS_ICD9CM_2005_AUI:A0387333', 'type': 'label'}, {'identifier': 'hasDbXref', 'language': 'undefined', 'value': 'UMLS_ICD9CM_2005_AUI:A0637138', 'type': 'label'}, {'identifier': 'hasDbXref', 'language': 'undefined', 'value': 'ICD9CM_2005:789.00', 'type': 'label'}, {'identifier': 'hasDbXref', 'language': 'undefined', 'value': 'ICD9CM_2005:789.0', 'type': 'label'}], 'hasOBONamespace': [{'identifier': 'hasOBONamespace', 'language': 'undefined', 'value': 'symptoms', 'type': 'label'}], 'id': [{'identifier': 'id', 'language': 'undefined', 'value': 'SYMP:0000457', 'type': 'label'}]}, attributes='93', baseIri='93', id='93', instances='93', iri='93', label_IRI-based='SYMP_0000457', label_undefined='abdominal pain', name='abdominal pain', subClasses=['126', '105', '107', '109', '111', '92'

In [5]:
print(cg.pretty_print_tree())

Source
├── symptom
│   ├── urinary system symptom
│   ├── general symptom
│   ├── respiratory system and chest symptom
│   ├── neurological and physiological symptom
│   ├── musculoskeletal system symptom
│   ├── nervous system symptom
│   ├── abdominal symptom
│   ├── head and neck symptom
│   ├── skin and integumentary tissue symptom
│   ├── hemic and immune system symptom
│   ├── digestive system symptom
│   ├── cardiovascular system symptom
│   ├── nutrition, metabolism, and development symptom
│   └── reproductive system symptom
├── obsolete appendicitis
├── ankle swelling
├── respiratory distress
├── ulcerations
├── osteomyelitis
├── portal hepatitis
├── photosensitivity
├── persistent lymphocytosis
├── petechial hemorrhages
├── partial paralysis
├── reddening
├── pustular skin lesions
├── prolonged thrombocytopenia
├── proliferative tracheitis
├── proliferative mouth papules
├── proliferative skin lesions
├── self-limiting liver inflammation
├── sclerodermoid plaques
├── salivar

In [6]:
query_str = """
inc_phrase:anxiety
inc_parent:DSM-V
inc_title:Disorder
inc_type:document
inc_text_longer:300
exc_ancestor:Neurodevelopmental Disorders
"""

In [7]:
def clean_query(query_str):
    query_str.replace('  ',' ').replace('\n',' ')
    for i in DIRECTION:
        query_str = query_str.replace(i, '_BREAK_'+ i)
    
    return [
        i.rstrip().lstrip()
        for i in query_str.split('_BREAK_')
        if i.rstrip().lstrip() != ''
    ]
clean_query(query_str)

['inc_phrase:anxiety',
 'inc_parent:DSM-V',
 'inc_title:Disorder',
 'inc_type:document',
 'inc_text_longer:300',
 'exc_ancestor:Neurodevelopmental Disorders']

In [8]:
class SearchableMixin(object):
    
    def search(self, query_dict):
        pass
        
    def search_phrase_in_text(self, phrase):
        if not self.body or not phrase:
            return 0
        
class ComorbidNode(ComorbidGraphNode):
    pass
