# Step 0: UMLS installation

### Reference: The Owlready2 package. See https://owlready2.readthedocs.io/en/latest/pymedtermino2.html#

In [2]:
# Install the package
!pip install owlready2

Collecting owlready2
  Downloading owlready2-0.47.tar.gz (27.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.3/27.3 MB[0m [31m20.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: owlready2
  Building wheel for owlready2 (pyproject.toml) ... [?25l[?25hdone
  Created wheel for owlready2: filename=owlready2-0.47-cp310-cp310-linux_x86_64.whl size=24075200 sha256=deac98253482beb75430377966e11f04349f940d842743666172580eb217e14d
  Stored in directory: /root/.cache/pip/wheels/27/3e/ba/4171c4b10bba9fe1774fbf8fcf794de889e636ce64ad83a533
Successfully built owlready2
Installing collected packages: owlready2
Successfully installed owlready2-0.47


In [3]:
# Get access to my Google drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
# Get access to the UMLS zip
from owlready2 import *
from owlready2.pymedtermino2 import *
from owlready2.pymedtermino2.umls import *
default_world.set_backend(filename = "pym.sqlite3")
import_umls("/content/drive/MyDrive/UMLS/umls-2024AB-metathesaurus-full.zip", terminologies = ["ICD10", "SNOMEDCT_US"])
default_world.save() # 14 min taken using CPU

Importing UMLS from /content/drive/MyDrive/UMLS/umls-2024AB-metathesaurus-full.zip with Python version 3.10.12 and Owlready version 2-0.47...
  Parsing 2024AB/META/MRRANK.RRF as MRRANK
  Parsing 2024AB/META/MRCONSO.RRF as MRCONSO
  Parsing 2024AB/META/MRDEF.RRF as MRDEF
  Parsing 2024AB/META/MRREL.RRF as MRREL
  Parsing 2024AB/META/MRSAT.RRF as MRSAT
Breaking ORIG cycles...
    SNOMEDCT_US : 0 cycles found: 
    ICD10 : 0 cycles found: 
    SRC : 0 cycles found: 
Finalizing only properties and restrictions...
Finalizing CUI - ORIG mapping...
FTS Indexing...


# Step 1: Loading SNOMED CT

In [5]:
from owlready2 import *
# default_world.set_backend(filename = "pym.sqlite3")
PYM = get_ontology("http://PYM/").load()
SNOMEDCT_US = PYM["SNOMEDCT_US"]

Lets have a look at the model by SNOMED ID

In [6]:
concept = SNOMEDCT_US[14969004]
print(concept)

SNOMEDCT_US["14969004"] # Catarrhal laryngitis



In [9]:
SNOMEDCT_US.has_concept(142931000119100)

True

Let's try using SNOMED term to search

In [8]:
concept = SNOMEDCT_US.search('Acute ulcerative laryngitis')
concept

[SNOMEDCT_US["195681001"] # Acute ulcerative laryngitis
]

# Step 2: Loading ICD-10

In [10]:
PYM = get_ontology("http://PYM/").load()
ICD10 = PYM["ICD10"]

In [11]:
print(ICD10['E10'])

ICD10["E10"] # Insulin-dependent diabetes mellitus



# Step 3: Map between SNOMED CT & ICD-10

We can map SNOMED CT to ICD-10 directly from a terminology in UMLS to another terminology in UMLS, for example from SNOMED CT to ICD10. See the example below for which converting SNOMEDCT by SNOMED Concept ID to ICD-10 code(s) and terms.

In [None]:
SNOMEDCT_US[13617004] >> ICD10

Concepts([
  ICD10["J98.8"] # Other specified respiratory disorders
, ICD10["J98.0"] # Diseases of bronchus, not elsewhere classified
, ICD10["J04.1"] # Acute tracheitis
])

In [None]:
result = SNOMEDCT_US[14969004] >> ICD10
list(result)

[ICD10["J37.0"] # Chronic laryngitis]

In [None]:
concept = SNOMEDCT_US.search('Acute ulcerative laryngitis')
concept
concept[0] >> ICD10

Concepts([
  ICD10["J04.0"] # Acute laryngitis
])

In [65]:
def snomed_to_icd10(snomed_id, snomed_term):

    if SNOMEDCT_US.has_concept(snomed_id) == False:
        concept = SNOMEDCT_US.search(snomed_term)
        if len(concept) != 0:
          result = concept[0] >> ICD10
          return list(result)
        else:
          return []

    try:
      result = SNOMEDCT_US[snomed_id] >> ICD10
    except TypeError:
      result = []

    return list(result)

# Step 4: Codelist converting

Go through the SNOMED coded list for ARIs from William's work and convert the code to ICD-10 format.

In [14]:
import pandas as pd

In [15]:
aris = pd.read_excel('SNOMED.xlsx', sheet_name='Sheet1')
aris.head()

Unnamed: 0,Level 3 indicator,SNOMEDConceptlD,SNOMED Preferred term,ICD10_Map,ICD10_additional code
0,ARI-Laryngitis,#10809006,Parainfluenza virus laryngotracheitis,,B97.8 Other viral agents as the cause of disea...
1,ARI-Laryngitis,#13617004,Tracheobronchitis,,
2,ARI-Laryngitis,#14969004,Catarrhal laryngitis,,J37.0
3,ARI-Laryngitis,#195680000,Acute oedematous laryngitis,,
4,ARI-Laryngitis,#195681001,Acute ulcerative laryngitis,,


In [25]:
 aris[aris['SNOMEDConceptlD'].isna()] # No missingness

Unnamed: 0,Level 3 indicator,SNOMEDConceptlD,SNOMED Preferred term,ICD10_Map,ICD10_additional code,SID


In [26]:
# Function to extract integer from SNOMEDConceptID
def extract_integer(snomed_id):
    # Remove the '#' and convert to integer
    return int(str(snomed_id).lstrip('#'))

In [27]:
aris['SID'] = aris['SNOMEDConceptlD'].apply(extract_integer)

In [68]:
aris['ICD10_Map'] = aris.apply(lambda row: snomed_to_icd10(row['SID'], row['SNOMED Preferred term']), axis=1)

In [69]:
# Save the updated DataFrame to a new CSV file
aris.to_excel('aris.xlsx', index=False)
