# Get Snomed component for Geboortezorg LOINC concepts
I set max_rows to 1000 and use 'Enable scrolling' on outputs - disable the set_option if this is not desired.

In [122]:
import pandas as pd
pd.set_option('display.max_rows', 1000)

## Get the local copies
Parts.csv and LOINC-Snomed map: the latter in DER2, snapshot.
Look at the map.

In [123]:
parts = 'C:\Source\LOINC269\Loinc_2.69_PartFile_5.1-Beta\Part.csv'
loinc2parts = 'C:\Source\LOINC269\Loinc_2.69_PartFile_5.1-Beta\LoincPartLink_Primary.csv'
map = 'C:\Source\Loinc_2.65_LoincSnomedCtCooperation_20170831T120000Z\SnomedCT_LOINCRF2_PRODUCTION_20170831T120000Z\Snapshot\Refset\Content\der2_scccRefset_LOINCMapCorrelationOriginSnapshot_INT_20170731.txt'
gzcodes = 'C:/Users/marcd/Dropbox/Documents/MdGIT/Nictiz/Geboortezorg/lcsanalyse.xlsx'

In [124]:
gz = pd.read_excel(gzcodes)
gz.head()

Unnamed: 0,Attribute:code,Attribute:longname,Attribute:system,Attribute:lcs,Alternatief LOINC,Commentaar
0,LA6718-6,,,nee,,
1,LA6717-8,,,nee,,
2,LA6716-0,,,nee,,
3,LA6719-4,,,nee,,
4,LA6720-2,,,nee,,


In [125]:
l2p = pd.read_csv(loinc2parts, low_memory=False)
l2p

Unnamed: 0,LoincNumber,LongCommonName,PartNumber,PartName,PartCodeSystem,PartTypeName,LinkTypeName,Property
0,10000-8,R wave duration in lead AVR,LP31088-5,R wave duration.lead AVR,http://loinc.org,COMPONENT,Primary,http://loinc.org/property/COMPONENT
1,10000-8,R wave duration in lead AVR,LP6879-3,Time,http://loinc.org,PROPERTY,Primary,http://loinc.org/property/PROPERTY
2,10000-8,R wave duration in lead AVR,LP6960-1,Pt,http://loinc.org,TIME,Primary,http://loinc.org/property/TIME_ASPCT
3,10000-8,R wave duration in lead AVR,LP7289-4,Heart,http://loinc.org,SYSTEM,Primary,http://loinc.org/property/SYSTEM
4,10000-8,R wave duration in lead AVR,LP7753-9,Qn,http://loinc.org,SCALE,Primary,http://loinc.org/property/SCALE_TYP
...,...,...,...,...,...,...,...,...
579444,9999-4,R wave duration in lead AVL,LP6879-3,Time,http://loinc.org,PROPERTY,Primary,http://loinc.org/property/PROPERTY
579445,9999-4,R wave duration in lead AVL,LP6960-1,Pt,http://loinc.org,TIME,Primary,http://loinc.org/property/TIME_ASPCT
579446,9999-4,R wave duration in lead AVL,LP7289-4,Heart,http://loinc.org,SYSTEM,Primary,http://loinc.org/property/SYSTEM
579447,9999-4,R wave duration in lead AVL,LP7753-9,Qn,http://loinc.org,SCALE,Primary,http://loinc.org/property/SCALE_TYP


In [126]:
gzcomp = pd.merge(gz, l2p[l2p['PartTypeName'] == 'COMPONENT'], left_on='Attribute:code', right_on='LoincNumber')[['LoincNumber', 'LongCommonName', 'PartNumber', 'PartName', 'PartTypeName']]
gzcomp = gzcomp.drop_duplicates()
gzcomp.head(20)

Unnamed: 0,LoincNumber,LongCommonName,PartNumber,PartName,PartTypeName
0,95939-5,Fetal Crown Rump length percentile per estimat...,LP34431-4,Length.crown rump,COMPONENT
1,11727-5,Fetal Body weight estimated by US,LP65139-5,Body weight,COMPONENT
2,11957-8,Fetal Crown Rump length US,LP34431-4,Length.crown rump,COMPONENT
3,11951-1,Fetal [Identifier] Identifier,LP72778-1,Identifier,COMPONENT
4,55283-6,Fetal Heart rate,LP72677-5,Heart rate,COMPONENT
5,12130-1,Fetal Narrative [Interpretation] Study observa...,LP36549-1,Study observation.general,COMPONENT
6,79192-1,Maternal pregnancy information Narrative,LP310072-6,Pregnancy information,COMPONENT
7,32408-7,1 minute Apgar Muscle tone,LP308674-3,Muscle tone^1M post birth,COMPONENT
8,32413-7,5 minute Apgar Muscle tone,LP308675-0,Muscle tone^5M post birth,COMPONENT
9,32403-8,10 minute Apgar Muscle tone,LP308673-5,Muscle tone^10M post birth,COMPONENT


In [127]:
dfmap = pd.read_csv(map, low_memory=False, delimiter='\t')
dfmap[dfmap['mapTarget'] == 'LP7727-3']

Unnamed: 0,id,effectiveTime,active,moduleId,refsetId,referencedComponentId,mapTarget,attributeId,correlationId,contentOriginId
3943,fd10d959-33be-47c6-b498-ff8d09789192,20170731,1,715515008,705112009,57495003,LP7727-3,718497002,447558009,705119000
7127,62ee216b-ce94-4831-86df-8d5f7b5e49eb,20170731,1,715515008,705112009,16211051000119109,LP7727-3,704327008,447558009,705119000


## Join the map and parts
* First join LOINC parts with Snomed mapping on LOINC Part Number
* Filter on attributeId for 'component', or NaN to keep missing values
* Make list nice

In [136]:
dfjoin = pd.merge(gzcomp, dfmap, left_on='PartNumber', right_on='mapTarget')
dfjoin = dfjoin[(dfjoin['attributeId'] == 246093002) | (dfjoin['attributeId'].isna())]
snomedpartmap = dfjoin[['LoincNumber', 'LongCommonName', 'PartNumber', 'PartTypeName', 'PartName', 'referencedComponentId', 'attributeId']].sort_values(['PartName'])
snomedpartmap.head(20)

Unnamed: 0,LoincNumber,LongCommonName,PartNumber,PartTypeName,PartName,referencedComponentId,attributeId
20,883-9,ABO group [Type] in Blood,LP17806-8,COMPONENT,ABO group,106202009,246093002
4,32300-6,Base excess in Cord blood by calculation,LP15429-1,COMPONENT,Base excess,68615006,246093002
36,14631-6,Bilirubin.total [Moles/volume] in Serum or Plasma,LP15448-1,COMPONENT,Bilirubin,79706000,246093002
23,948-0,C Ag [Presence] on Red Blood Cells,LP37459-2,COMPONENT,C Ag,84538003,246093002
39,14685-2,Cobalamin (Vitamin B12) [Moles/volume] in Seru...,LP15505-8,COMPONENT,Cobalamins,304111003,246093002
43,7852-7,Cytomegalovirus IgG Ab [Units/volume] in Serum...,LP37880-9,COMPONENT,Cytomegalovirus Ab.IgG,710426001,246093002
42,7853-5,Cytomegalovirus IgM Ab [Units/volume] in Serum...,LP37881-7,COMPONENT,Cytomegalovirus Ab.IgM,444067005,246093002
2,1305-2,D Ag [Presence] in Blood,LP37889-0,COMPONENT,D Ag,876000,246093002
3,978-7,D Ag [Presence] on Red Blood Cells,LP37889-0,COMPONENT,D Ag,876000,246093002
41,2276-4,Ferritin [Mass/volume] in Serum or Plasma,LP15568-6,COMPONENT,Ferritin,32789000,246093002


## Define snomed function

In [137]:
import requests
import numpy as np

baseUrl = 'https://browser.ihtsdotools.org/snowstorm/snomed-ct/'
edition = 'MAIN'
version = '2021-01-31'

def getSnomedTerm(sctid):
    uri = baseUrl + 'browser/' + edition + '/' + version + '/concepts/' + sctid
    resp = requests.get(uri)
    try:
        term = resp.json()['fsn']['term']
    except KeyError:
        term = "Term not found"
    return term

In [138]:
getSnomedTerm('89177007')

'Proton (substance)'

## Get Snomed terms
We get the Snomed terms from the Snomed API for comparison with the LCS names.

This takes some time.

In [139]:
# uncomment next line, and comment the one after to skip getting terms from Snomed server
# snomedpartmap['Term'] = ''
snomedpartmap['Term'] = snomedpartmap.apply(lambda row: getSnomedTerm(str(row['referencedComponentId'])), axis=1)
snomedpartmap

Unnamed: 0,LoincNumber,LongCommonName,PartNumber,PartTypeName,PartName,referencedComponentId,attributeId,Term
20,883-9,ABO group [Type] in Blood,LP17806-8,COMPONENT,ABO group,106202009,246093002,Antigen in ABO blood group system (substance)
4,32300-6,Base excess in Cord blood by calculation,LP15429-1,COMPONENT,Base excess,68615006,246093002,Bicarbonate (substance)
36,14631-6,Bilirubin.total [Moles/volume] in Serum or Plasma,LP15448-1,COMPONENT,Bilirubin,79706000,246093002,Bilirubin (substance)
23,948-0,C Ag [Presence] on Red Blood Cells,LP37459-2,COMPONENT,C Ag,84538003,246093002,Blood group antigen C (substance)
39,14685-2,Cobalamin (Vitamin B12) [Moles/volume] in Seru...,LP15505-8,COMPONENT,Cobalamins,304111003,246093002,Cobalamin (substance)
43,7852-7,Cytomegalovirus IgG Ab [Units/volume] in Serum...,LP37880-9,COMPONENT,Cytomegalovirus Ab.IgG,710426001,246093002,Immunoglobulin G antibody to Cytomegalovirus (...
42,7853-5,Cytomegalovirus IgM Ab [Units/volume] in Serum...,LP37881-7,COMPONENT,Cytomegalovirus Ab.IgM,444067005,246093002,Cytomegalovirus immunoglobulin M antibody (sub...
2,1305-2,D Ag [Presence] in Blood,LP37889-0,COMPONENT,D Ag,876000,246093002,Blood group antigen D (substance)
3,978-7,D Ag [Presence] on Red Blood Cells,LP37889-0,COMPONENT,D Ag,876000,246093002,Blood group antigen D (substance)
41,2276-4,Ferritin [Mass/volume] in Serum or Plasma,LP15568-6,COMPONENT,Ferritin,32789000,246093002,Ferritin (substance)


## Write to Excel

In [140]:
snomedpartmap.to_excel("GZ-LOINC-Snomed-components.xlsx")

## Write XML file
Only the *new mappings* are written to XML. Copy after existing mappings.

In [141]:
from xml_to_pandas import to_xml
to_xml(snomedpartmap, file_name='GZ-LOINC-Snomed-components.xml', root_name='map', row_name='concept')
'ok'

'ok'