#### Demo Scripts for the crosswalk package


In [1]:
import pandas as pd

import cwmed as cw

Step1: Download the source and target concepts from OHDSI Athena, https://athena.ohdsi.org/auth/login

1a) Login (if don't have an account you could register with your email), and click the <span style="background-color: #85af36; color: white; padding: 4px; border: 2px solid black;">DOWNLOAD</span> from the header, and select the source and target vocabualries. 
In this example, ICD10CM is the source and SNOMED is the target (different source and targets could be selected.)

| ID | CODE    | NAME                                                                    |
|----|---------|-------------------------------------------------------------------------|
| 1  | SNOMED  | Systematic Nomenclature of Medicine - Clinical Terms (IHTSDO)            |
| 70 | ICD10CM | International Classification of Diseases, Tenth Revision, Clinical Modification (NCHS) |


1b)  Click on <span style="background-color: #ad007c; color: white; padding: 4px; border: 2px solid black;">DOWNLOAD VOCABULARIES</span>. Add text to name bundle: ICD10CM-to-SNOMED and click <span style="background-color: #2badce; color: white; padding: 4px; border: 2px solid black;">DOWNLOAD</span>.


1c) An email will be sent to your registered email (see example below).

**Link for downloading the Standardized Vocabularies**
Vocabularies release version: **v5.0 31-MAY-23**

**ICD10CM-to-SNOMED Vocabularies**:\
ICD10CM	-	International Classification of Diseases, Tenth Revision, Clinical Modification (NCHS)\
SNOMED	-	Systematic Nomenclature of Medicine - Clinical Terms (IHTSDO)

Please download and load the Standardized Vocabularies as following:
1) Click on this [link](https://athena.ohdsi.org/api/v1/vocabularies/zip/084c2629-f1e0-4e00-bd76-1b5ebea2d266) to download the zip file. Typical file sizes, depending on the number of vocabularies selected, are between 30 and 250 MB.


In [None]:
# i) Copy the url path from the link and add it below as input to the download data function, using right click inspect element. 
# Note the link is time limited and may result in a forbidden data API access observed in the reponse.content due to third party permissions, 
# however you will still be able to click the link or copy and paste the url in your browser as in ii) (cell below) and unzip the files.
url = 'https://athena.ohdsi.org/api/v1/vocabularies/zip/084c2629-f1e0-4e00-bd76-1b5ebea2d266'
path = 'sample-data/input'
cw.download_data(url,path)

In [3]:
# ii) Alternatively, click on the link or copy and paste the url and unzip the files. 
# Add the CONCEPT.csv and CONCEPT_RELATIONSHIP.csv to your input data path, "sample-data/input".
# Verify that both files are present.
import os

if os.path.isfile(os.path.join('sample-data/input', 'CONCEPT.csv')) and os.path.isfile(os.path.join('sample-data/input', 'CONCEPT_RELATIONSHIP.csv')):
    print("Both the CONCEPT.csv and CONCEPT_RELATIONSHIP.csv files are present.")

Both the CONCEPT.csv and CONCEPT_RELATIONSHIP.csv files are present.


In [4]:
import numpy as np
# 2. Get the unique vocabs from the concept.csv which are useful in step 3. to denote the source and target values.
unique_vocabs = cw.get_unique_vocab('sample-data/input/CONCEPT.csv')
# Check if the unique vocab list contains the source and target values.
np.any(np.isin(unique_vocabs, ['ICD10CM', 'SNOMED']))
source_value = 'ICD10CM'
target_value = 'SNOMED'

if np.any(np.isin(unique_vocabs, [source_value, target_value])):
    print(f"The source value '{source_value}' and target value '{target_value}' are present.")
else:
    print(f"The source value '{source_value}' and target value '{target_value}' are not present.")

The source value 'ICD10CM' and target value 'SNOMED' are present.


  concepts = pd.read_csv(file_path, sep='\t')


In [5]:
# 3. Read the source to vocab crosswalk as a Vocab Translator object. 
# Add the path to the source data, icd10.csv, including source column, 'icd10'.
# Add the path to the concept.csv, along with the source and target column as observed from step 2. 
# Add the path to the concept_relationship.csv
vocab = cw.VocabTranslator(source_filepath = 'sample-data/input/icd10.csv',
                           source_code_col = 'icd10',
                           concept_filepath = 'sample-data/input/CONCEPT.csv',
                           source_vocab_value = 'ICD10CM',target_vocab_value = 'SNOMED',
                           concept_relationship_filepath = 'sample-data/input/CONCEPT_RELATIONSHIP.csv')
vocab

  df = pd.read_csv(self.concept_filepath, sep='\t',


<crosswalk.VocabTranslator at 0x130673a00>

In [6]:
#4. Display the source to target table.
df = vocab.show_source_to_target_table()
df.head(3)

Unnamed: 0,ICD10CM,ICD10CM_label,ICD10CM_omop_id,SNOMED,SNOMED_label,SNOMED_omop_id
0,A04.4,Other intestinal Escherichia coli infections,35205417,111839008,Intestinal infection due to E. coli,192815
1,A04.7,Enterocolitis due to Clostridium difficile,35205420,186431008,Clostridioides difficile infection,193688
2,A04.72,"Enterocolitis due to Clostridium difficile, no...",1326483,423590009,Clostridium difficile colitis,4307981


In [7]:
#5. Save the source-to-target mapping table to a CSV file.
vocab.save_source_to_target('sample-data/output/icd10_to_snomed.csv')

In [8]:
#6. Save the failed source to target mappings to a CSV file.
vocab.save_source_to_target_failed_mappings('sample-data/output/icd10_to_snomed_failed_mappings.csv')
failed_mappings= pd.read_csv('sample-data/output/icd10_to_snomed_failed_mappings.csv')
failed_mappings.head(3)

Unnamed: 0,ICD10CM,ICD10CM_label,ICD10CM_omop_id,SNOMED,SNOMED_label,SNOMED_omop_id
0,C78.7,Secondary malignant neoplasm of liver and intr...,,,,
1,Z77.22,Contact with and (suspected) exposure to envir...,,,,
2,Z85.038,Personal history of other malignant neoplasm o...,,,,
