In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%cd drive/My\ Drive/Colab\ Notebooks/apex-codes/entity_sum

[Errno 2] No such file or directory: 'drive/My Drive/Colab Notebooks/apex-codes/entity_sum'
/content/drive/My Drive/Colab Notebooks/apex-codes/entity_sum


## Working with PymedTerimno

### https://pythonhosted.org/PyMedTermino/

In [None]:
from pymedtermino import *
from pymedtermino.snomedct import *
from pymedtermino.icd10 import *

In [None]:
ICD10.search("Disorders involving the immune mechanism")

[ICD10[u"D80-D89"]  # Certain disorders involving the immune mechanism,
 ICD10[u"D89.8"]  # Other specified disorders involving the immune mechanism, not elsewhere classified,
 ICD10[u"O99.1"]  # Other diseases of the blood and blood-forming organs and certain disorders involving the immune mechanism complicating pregnancy, childbirth and the puerperium,
 ICD10[u"Z83.2"]  # Family history of diseases of the blood and blood-forming organs and certain disorders involving the immune mechanism,
 ICD10[u"Z86.2"]  # Personal history of diseases of the blood and blood-forming organs and certain disorders involving the immune mechanism,
 ICD10[u"N16.2"]  # Renal tubulo-interstitial disorders in blood diseases and disorders involving the immune mechanism,
 ICD10[u"Z13.0"]  # Special screening examination for diseases of the blood and blood-forming organs and certain disorders involving the immune mechanism,
 ICD10[u"N08.2"]  # Glomerular disorders in blood diseases and disorders involving the i

## Method that accepts name of an ICD chapter sub-category and returns a list of disorders from SNOMED-CT

In [None]:
def _get_disorders(icd_sub_category):
  results_list = SNOMEDCT.search(icd_sub_category)
  disorders_lst = []
  for item in results_list:
    if '(disorder)' in str(item):
      disorders_lst.append(str(item).split(' #')[1].strip().replace('(disorder)', '').strip())

  return disorders_lst


In [None]:
#lst_icd_sub_categories = ['malignant neoplasm', 'In situ neoplasm', 'benign neoplasm']  # list of icd-10 sub-categories for a given ICD-10 chapter
#lst_icd_sub_categories = ['disorders involving the immune mechanism', 'Haemolytic anaemia', 'Nutritional anaemia', 'Aplastic and other anaemias', 'Coagulation defects, purpura and other haemorrhagic conditions', 'diseases of blood and blood-forming organs']
lst_icd_sub_categories = ['maternal disorder', \
                          'puerperium complication', \
                          'labour complication', \
                          'pregnancy complication']

disorders_lst_per_ICD_chapter = []

for icd_sub_cat in lst_icd_sub_categories:
  disorders_lst_per_ICD_chapter += _get_disorders(icd_sub_cat)

In [None]:
len(disorders_lst_per_ICD_chapter)

231

In [None]:
disorders_lst_per_ICD_chapter[:5]

['Fetal or neonatal effect of maternal nutritional disorder',
 'Maternal AND/OR fetal condition affecting labor AND/OR delivery',
 'Delivery AND/OR maternal condition affecting management',
 'Maternal syphilis during pregnancy, childbirth and the puerperium',
 'Maternal syphilis during pregnancy - baby delivered']

## Write the list of disorders per ICD-10 chapter to a file system

In [None]:
with open('SNOMED-CT-diseases/pregnancy_childbirth_and_the_puerperium.csv', 'w') as filehandle:
  for listitem in disorders_lst_per_ICD_chapter:
    filehandle.write('%s\n' % listitem)

## Search 

## DataMed

### https://datamed.org/APIDoc.php

In [None]:
!curl -X GET "https://datamed.org/webapi/esearch?searchtype=data&query=maternal%20complication" -H  "accept: application/json" > "DataMed_Results/pregnancy_childbirth_and_the_puerperium/maternal_disorder.json"

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  140k    0  140k    0     0   110k      0 --:--:--  0:00:01 --:--:--  110k


In [None]:
with open("DataMed_Results/pregnancy_childbirth_and_the_puerperium/maternal_disorder.json", "r") as fp:
  data = json.load(fp)
fp.close()

In [None]:
from pprint import pprint
pprint(data)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
                                                  'organization',
                                                  'publication',
                                                  'dataset',
                                                  'taxonomicinformation']},
                       'dataRepository': {'ID': 'SCR:004801',
                                          'homePage': 'http://www.ncbi.nlm.nih.gov/bioproject',
                                          'name': 'NCBI BioProject'},
                       'dataset': {'ID': 'PRJNA326441',
                                   'description': 'The Multi-Omic Microbiome '
                                                  'Study-Pregnancy Initiative '
                                                  '(MOMS-PI) is a '
                                                  'collaborative project with '
                                                  'the Global Alliance to '


In [None]:
import json

with open("DataMed_Results/pregnancy_childbirth_and_the_puerperium/maternal_disorder.json", "r") as fp:
  data = json.load(fp)


diseases_list = []

for item in data['hits']:
  try:
    diseases_list += item['_source']['NLP_Fields']['Disease']
  except KeyError:
    continue

diseases_list = list(set(diseases_list))

In [None]:
diseases_list[:3]

['metabolic syndrome', 'gestational diabetes mellitus', 'hypoxia']

## Combine the list of SNOMED-CT and DataMed diseases into one

In [None]:
final_diseases_lst = disorders_lst_per_ICD_chapter + diseases_list

In [None]:
len(final_diseases_lst)

279

## Write the final list of diseases (disorders) to a file system

In [None]:
with open('SNOMED_CT_and_DataMed_diseases/pregnancy_childbirth_and_the_puerperium.csv', 'w') as filehandle:
  for listitem in final_diseases_lst:
    filehandle.write('%s\n' % listitem)