# Add Concept IDs and other properties to an entity: use case Anxiety

Use this notebook as example to add icd codes or concept IDs to existing ontology concepts

Author: Jingna Feng
Date: Jan 8, 2025

In [1]:
import pandas as pd
import numpy as np
import util as ut

In [2]:
path_voc = './vocabulary_v5/'
path_out = './output/ontology/'
path_in = './input/'

In [3]:
import os
#os.listdir(path_in)

In [4]:
from owlready2 import *
onto_path.append(path_in)
from rdflib.namespace import XSD
#onto = get_ontology("https://github.com/Tao-AI-group/MCContology")
onto = get_ontology(path_in+'MCC_RDF_9') ## 6 with medical codes data type changed to xsd:string
onto.load()

if onto:
    print("Ontology loaded successfully")
else:
    print("Failed to load ontology")

Ontology loaded successfully


In [5]:
entity = onto.search_one(label="Hypertension")  # Replace with your entity's label or name

if entity:
    print(f"Entity found: {entity.name}")
    print(f"Entity type: {entity.is_a}")
else:
    print("Entity not found in the ontology")

Entity found: Hypertension
Entity type: [MCC_RDF_9.Cardiovascular_Disease, MCC_RDF_9.risk_factor_of.some(MCC_RDF_9.Atrial_Fibrillation)]


In [6]:
entity = onto.search_one(label="Generalized_Anxiety_Disorder")  # Replace with your entity's label or name

if entity:
    print(f"Entity found: {entity.name}")
    print(f"Entity type: {entity.is_a}")
else:
    print("Entity not found in the ontology")

Entity found: Generalized_Anxiety_Disorder
Entity type: [MCC_RDF_9.Mental_Health_Disorder]


In [7]:
def brace_rm (label):
    newText = str(label).replace('[','').replace(']','').replace('\'','')
    return newText

In [8]:
dz = 'anxiety disorder' # title case
fn_owl = 'MCC_py29.owl'
csv = 'generalAnxiety.csv'

In [9]:
path_pheno = './phenotype/'

os.listdir(path_pheno)



['osteoporosis.csv',
 'heartFailure.csv',
 'arthritis.csv',
 'autism.csv',
 'ckd.csv',
 'depression.csv',
 'cancer.csv',
 'hypertension.csv',
 'cad.csv',
 'adhd.csv',
 'schz.xlsx',
 'generalAnxiety.csv',
 'schz.csv',
 'depressionMajor.csv',
 'panicDisorder.csv',
 'hyperlipidemia.csv',
 'parkinson.csv',
 'dilusional.csv',
 'diabetes.csv',
 'af.csv',
 'copd.csv',
 'thyroid.csv',
 'stroke.csv']

In [10]:
df = pd.read_csv(path_pheno+csv)
df.columns

Index(['concept_source_id', 'concept_name', 'concept_source_value',
       'vocabulary_id', 'condition_concept_id', 'standard_concept_vocabulary',
       'SNOMED_CODE', 'SNOMED_TERM'],
      dtype='object')

In [11]:
df.shape

(2, 8)

## Define disease in onto: o1

In [12]:

#https://github.com/Tao-AI-group/MCContology#Hypertension 'MCC_py8.owl''MCC_py11.owl'
#https://github.com/Tao-AI-group/MCContology#Heart_Failure 'MCC_py9.owl'
#https://github.com/Tao-AI-group/MCContology#Diabetes'MCC_py12.owl'
#https://github.com/Tao-AI-group/MCContology#Chronic_Kidney_Disease'MCC_py13.owl'
# https://github.com/Tao-AI-group/MCContology#Chronic_Obstructive_Pulmonary_Disease 'MCC_py14.owl'
# https://github.com/Tao-AI-group/MCContology#Osteoporosis 'MCC_py15.owl'
# https://github.com/Tao-AI-group/MCContology#Coronary_Artery_Disease 'MCC_py16.owl'
# https://github.com/Tao-AI-group/MCContology#Hyperlipidemia 'MCC_py17.owl'
# https://github.com/Tao-AI-group/MCContology#Cancer 'MCC_py18.owl'
# https://github.com/Tao-AI-group/MCContology#Depression 'MCC_py19.owl'
# https://github.com/Tao-AI-group/MCContology#Stroke 'MCC_py20.owl'
# https://github.com/Tao-AI-group/MCContology#Thyroid_Disorder 'MCC_py21.owl'
# https://github.com/Tao-AI-group/MCContology#Parkinsons_Disease 'MCC_py22.owl'
# https://github.com/Tao-AI-group/MCContology#Arthritis 'MCC_py23.owl'
# https://github.com/Tao-AI-group/MCContology#ADHD'MCC_py24.owl'

with onto:
    o1 = onto.Generalized_Anxiety_Disorder
    print(o1.is_a)

[MCC_RDF_9.Mental_Health_Disorder]


In [13]:
#onto.search(is_a = onto.Pizza, has_topping = onto.search(is_a = onto.TomatoTopping))
o1.is_a

[MCC_RDF_9.Mental_Health_Disorder]

In [14]:
o2 = o1
for prop in o2.get_properties(self=o2):
    for value in prop[o2]:
        print(".%s == %s" % (prop.python_name, value))

.SNOMED_term == Generalized anxiety disorder
.ICD9CM ==  
.has_Athena == Yes
.has_SNOMED == Yes
.ConditionConceptID ==  
.AthenaID ==  
.SNOMED ==  
.has_ConceptID == Yes
.label == Generalized_Anxiety_Disorder


In [15]:
o1.descendants(), o1.ancestors()

({MCC_RDF_9.Generalized_Anxiety_Disorder},
 {MCC_RDF_9.Disease,
  MCC_RDF_9.Generalized_Anxiety_Disorder,
  MCC_RDF_9.Mental_Health_Disorder,
  MCC_RDF_9.Mental_and_Neural_Disease,
  owl.Thing})

In [16]:
len(o1.descendants()), len(o1.ancestors())

(1, 5)

### Add subclasses

In [17]:
os.listdir('/Users/feng.jingna/macLocal/Vocabulary/Athena/phenotype')

['osteoporosis.csv',
 'heartFailure.csv',
 'arthritis.csv',
 'autism.csv',
 'ckd.csv',
 'depression.csv',
 'cancer.csv',
 'hypertension.csv',
 'cad.csv',
 'adhd.csv',
 'schz.xlsx',
 'generalAnxiety.csv',
 'schz.csv',
 'depressionMajor.csv',
 'panicDisorder.csv',
 'hyperlipidemia.csv',
 'parkinson.csv',
 'dilusional.csv',
 'diabetes.csv',
 'af.csv',
 'copd.csv',
 'thyroid.csv',
 'stroke.csv']

In [18]:
with onto:
    print(o1.is_a)

[MCC_RDF_9.Mental_Health_Disorder]


## Import Phenotype CSV

In [19]:
df.shape

(2, 8)

## ICD 10 and ICD 9 datatables

In [27]:
df_10 = df.loc[df['vocabulary_id']=='ICD10CM'].reset_index(drop=True)
df_9 = df.loc[df['vocabulary_id']=='ICD9CM'].reset_index(drop=True)

## Use Concept NAMES as class entity name

In [28]:
import types

In [29]:
o1.ancestors()

{MCC_RDF_9.Disease,
 MCC_RDF_9.Generalized_Anxiety_Disorder,
 MCC_RDF_9.Mental_Health_Disorder,
 MCC_RDF_9.Mental_and_Neural_Disease,
 owl.Thing}

### Define class O

In [30]:
#onto.Atrial_Fibrillation.descendants()
o = o1
print (o.descendants())

{MCC_RDF_9.Generalized_Anxiety_Disorder}


In [31]:
for oo in o.descendants():
    print(oo.iri,oo.AthenaID, oo.label, oo.ICD10CM, oo.SNOMED,oo.SNOMED_term)

# Concept_ID = concept_source_id = Athena ID
# class Entity name = concept_name
# to add:SNOMED_term,vocabulary_id, condition_concept_id
# columns from phenotype csv:  concept_source_value (icd codes), vocabulary_id (icd system), condition_concept_id (?), standard_concept_vocabulary (SNOMED), concept_source_id = Athena ID
# to delete: has_Athena, has_ConceptCode, has_ConceptID, has_MONDO, has_SNOMED, is_ClassificationConcept, is_Standard,  ICD9CM, ICD10CM

https://github.com/Tao-AI-group/MCContology#Generalized_Anxiety_Disorder [' '] ['Generalized_Anxiety_Disorder'] [] [' '] ['Generalized anxiety disorder']


### Check IRI of annotation property


In [32]:

with onto:
    for a in onto.annotation_properties():
        print(a.label,a.iri)      

[] http://www.geneontology.org/formats/oboInOwl#created_by
[] http://www.geneontology.org/formats/oboInOwl#creation_date
[] http://www.w3.org/2004/02/skos/core#altLabel
[] http://www.w3.org/2004/02/skos/core#comment
[] http://www.w3.org/2004/02/skos/core#definition
[locstr('ATC', 'en')] https://github.com/Tao-AI-group/MCContology#ATC
[locstr('AthenaID', 'en')] https://github.com/Tao-AI-group/MCContology#AthenaID
[locstr('ConditionConceptID', 'en')] https://github.com/Tao-AI-group/MCContology#ConditionConceptID
[locstr('DrugBankID', 'en')] https://github.com/Tao-AI-group/MCContology#DrugBankID
[locstr('ICD10CM', 'en')] https://github.com/Tao-AI-group/MCContology#ICD10CM
[locstr('ICD9CM', 'en')] https://github.com/Tao-AI-group/MCContology#ICD9CM
[locstr('MeSH', 'en')] https://github.com/Tao-AI-group/MCContology#MeSH
[locstr('RxNorm', 'en')] https://github.com/Tao-AI-group/MCContology#RxNorm
[locstr('SNOMED', 'en')] https://github.com/Tao-AI-group/MCContology#SNOMED
[locstr('SNOMED_term',

#### Count total classes to add

In [33]:
df = df.loc[df['concept_name']!=dz].reset_index(drop=True)
df

Unnamed: 0,concept_source_id,concept_name,concept_source_value,vocabulary_id,condition_concept_id,standard_concept_vocabulary,SNOMED_CODE,SNOMED_TERM
0,44829926,Generalized anxiety disorder,300.02,ICD9CM,434613,SNOMED,21897009,Generalized anxiety disorder
1,35207170,Generalized anxiety disorder,F41.1,ICD10CM,434613,SNOMED,21897009,Generalized anxiety disorder


In [34]:
df['concept_name'].nunique()

1

In [35]:
# df = df.loc[df['concept_name']!='Atrial fibrillation'].reset_index(drop=True)
df_10.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1 entries, 0 to 0
Data columns (total 8 columns):
 #   Column                       Non-Null Count  Dtype 
---  ------                       --------------  ----- 
 0   concept_source_id            1 non-null      int64 
 1   concept_name                 1 non-null      object
 2   concept_source_value         1 non-null      object
 3   vocabulary_id                1 non-null      object
 4   condition_concept_id         1 non-null      int64 
 5   standard_concept_vocabulary  1 non-null      object
 6   SNOMED_CODE                  1 non-null      int64 
 7   SNOMED_TERM                  1 non-null      object
dtypes: int64(3), object(5)
memory usage: 192.0+ bytes


In [36]:
df_9.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1 entries, 0 to 0
Data columns (total 8 columns):
 #   Column                       Non-Null Count  Dtype 
---  ------                       --------------  ----- 
 0   concept_source_id            1 non-null      int64 
 1   concept_name                 1 non-null      object
 2   concept_source_value         1 non-null      object
 3   vocabulary_id                1 non-null      object
 4   condition_concept_id         1 non-null      int64 
 5   standard_concept_vocabulary  1 non-null      object
 6   SNOMED_CODE                  1 non-null      int64 
 7   SNOMED_TERM                  1 non-null      object
dtypes: int64(3), object(5)
memory usage: 192.0+ bytes


In [37]:
df_10['concept_source_id']=df_10['concept_source_id'].astype(str)

In [38]:
df_10.iloc[0]

concept_source_id                                  35207170
concept_name                   Generalized anxiety disorder
concept_source_value                                  F41.1
vocabulary_id                                       ICD10CM
condition_concept_id                                 434613
standard_concept_vocabulary                          SNOMED
SNOMED_CODE                                        21897009
SNOMED_TERM                    Generalized anxiety disorder
Name: 0, dtype: object

In [39]:
df_10['concept_name'].nunique()

1

In [40]:
## TODO: change integer to string

o=o
df = df_10
    
s_ls = df['concept_name']
s_cd = df['SNOMED_CODE']
c_cd = df['concept_source_id'] # athenaID
sterm_ls = df['SNOMED_TERM']
cdt_ls = df['condition_concept_id']
icd_ls = df['concept_source_value'] #ConceptCode
icd_flg = df['vocabulary_id']

    
import types

with onto:
    for i in range (0, len(c_cd)):
        s = s_ls[i].replace(' ','_').replace('-','').replace(',_','_').replace('[','_').replace(']','')
        scd = int(s_cd[i])
        st = sterm_ls[i]
        athn = int(c_cd[i])
        ctd = int(cdt_ls[i])
        icd = str(icd_ls[i])
        #flg = str(icd_flg[i])
        #name = str(st).title().replace(' ','_').replace('-','').replace(',_','_').replace('/','_').replace('[','_').replace(']','')  ## SNOMED name as class names
        name = str(s).title().replace(' ','_').replace('-','').replace(',_','_').replace('/','_').replace('[','_').replace(']','')  ## ICD name as class names
        try:
            cls = types.new_class(name, (o,))
            cls.label=name
            print('Created: ', name)
            if athn not in list(cls.AthenaID):
                cls.AthenaID.append(str(athn))
            if scd not in list(cls.SNOMED):
                cls.SNOMED.append(str(scd))
            if str(st) not in list(cls.SNOMED_term):
                cls.SNOMED_term.append(str(st))
            #s.ConceptID = athn
            cls.has_Athena = 'Yes'
            cls.has_ConceptID = 'Yes'
            cls.has_SNOMED = 'Yes'
            if ctd not in list(cls.ConditionConceptID):
                cls.ConditionConceptID.append(str(ctd))
            if icd not in list(cls.ICD10CM):
                cls.ICD10CM.append(str(icd))
            else:
                print('Skipped:',name)
        except Exception as e:
            print(f'Class {name} already exists. Error: {e}')     
        
        i +=1

Class Generalized_Anxiety_Disorder already exists. Error: a __bases__ item causes an inheritance cycle


In [41]:
for oo in o.descendants():
    print(oo.iri,oo.AthenaID, oo.label, oo.ICD10CM, oo.ICD9CM, oo.SNOMED,oo.SNOMED_term)

https://github.com/Tao-AI-group/MCContology#Generalized_Anxiety_Disorder [' '] ['Generalized_Anxiety_Disorder'] [] [' '] [' '] ['Generalized anxiety disorder']


In [42]:
name_ls = list()
for oo in o.descendants():
    nm = oo.label
    name_ls.append(nm)
print(len(name_ls))

1


In [43]:
name_ls

[['Generalized_Anxiety_Disorder']]

In [44]:
df_9['concept_name'].nunique()

1

In [45]:
t =str(o.label).replace('[','').replace(']','').replace('\'','')
print(t, type(t))

Generalized_Anxiety_Disorder <class 'str'>


In [46]:
t =name
print(t, type(t))

Generalized_Anxiety_Disorder <class 'str'>


In [47]:
o=o
df = df_9
    
s_ls = df['concept_name']
s_cd = df['SNOMED_CODE']
c_cd = df['concept_source_id'].astype(int) # athenaID
sterm_ls = df['SNOMED_TERM']
cdt_ls = df['condition_concept_id']
icd_ls = df['concept_source_value'] #ConceptCode
icd_flg = df['vocabulary_id']

    
import types


with onto:
    for i in range (0, len(c_cd)):
        s = s_ls[i].replace(' ','_').replace('-','').replace(',_','_').replace('/','_').replace('[','_').replace(']','')
        scd = int(s_cd[i])
        st = sterm_ls[i]
        athn = int(c_cd[i])
        ctd = int(cdt_ls[i])
        icd = str(icd_ls[i])
        #flg = str(icd_flg[i])
        #name = str(st).title().replace(' ','_').replace('-','').replace(',_','_').replace('/','_').replace('[','_').replace(']','') ## SNOMED term as class name
        name = str(s).title().replace(' ','_').replace('-','').replace(',_','_').replace('/','_').replace('[','_').replace(']','') ## ICD name as class names
        if name in name_ls:
            print('Updated:',name)
            if athn not in list(cls.AthenaID):
                cls.AthenaID.append(str(athn))
            if scd not in list(cls.SNOMED):
                cls.SNOMED.append(str(scd))
            if str(st) not in list(cls.SNOMED_term):
                cls.SNOMED_term.append(str(st))
            #s.ConceptID = athn
            cls.has_Athena = 'Yes'
            cls.has_ConceptID = 'Yes'
            cls.has_SNOMED = 'Yes'
            if ctd not in list(cls.ConditionConceptID):
                cls.ConditionConceptID.append(str(ctd))
            if len(list(cls.ICD9CM))==0:
                cls.ICD9CM=icd
            if icd not in list(cls.ICD9CM):
                cls.ICD9CM.append(str(icd))
            else:
                pass
        elif name != str(o.label).replace('[','').replace(']','').replace('\'',''):
            print('Created:',name)
            cls = types.new_class(name, (o,))
            cls.label=name
            if athn not in list(cls.AthenaID):
                cls.AthenaID.append(str(athn))
            if scd not in list(cls.SNOMED):
                cls.SNOMED.append(str(scd))
            if str(st) not in list(cls.SNOMED_term):
                cls.SNOMED_term.append(str(st))
            #s.ConceptID = athn
            cls.has_Athena = 'Yes'
            cls.has_ConceptID = 'Yes'
            cls.has_SNOMED = 'Yes'
            if ctd not in list(cls.ConditionConceptID):
                cls.ConditionConceptID.append(str(ctd))
            #cls.ICD9CM=icd
            if icd not in list(cls.ICD9CM):
                cls.ICD9CM.append(str(icd))
        else:
            print('Not created:',name,'\n')

Not created: Generalized_Anxiety_Disorder 



In [48]:
for oo in o.descendants():
    print(oo.iri,oo.AthenaID, oo.label, oo.ICD10CM, oo.ICD9CM, oo.SNOMED,oo.SNOMED_term)

https://github.com/Tao-AI-group/MCContology#Generalized_Anxiety_Disorder [' '] ['Generalized_Anxiety_Disorder'] [] [' '] [' '] ['Generalized anxiety disorder']


In [49]:
print(len(list(o.descendants())))

1


In [50]:
for on in list(onto.annotation_properties()):
    print(on.iri)

http://www.geneontology.org/formats/oboInOwl#created_by
http://www.geneontology.org/formats/oboInOwl#creation_date
http://www.w3.org/2004/02/skos/core#altLabel
http://www.w3.org/2004/02/skos/core#comment
http://www.w3.org/2004/02/skos/core#definition
https://github.com/Tao-AI-group/MCContology#ATC
https://github.com/Tao-AI-group/MCContology#AthenaID
https://github.com/Tao-AI-group/MCContology#ConditionConceptID
https://github.com/Tao-AI-group/MCContology#DrugBankID
https://github.com/Tao-AI-group/MCContology#ICD10CM
https://github.com/Tao-AI-group/MCContology#ICD9CM
https://github.com/Tao-AI-group/MCContology#MeSH
https://github.com/Tao-AI-group/MCContology#RxNorm
https://github.com/Tao-AI-group/MCContology#SNOMED
https://github.com/Tao-AI-group/MCContology#SNOMED_term
https://github.com/Tao-AI-group/MCContology#UMLS_CUI
https://github.com/Tao-AI-group/MCContology#VocabularyID
https://github.com/Tao-AI-group/MCContology#VocabularySystem
https://github.com/Tao-AI-group/MCContology#has_A

## Save to OWL and RDF formats

In [68]:
onto.save(file=fn_owl)
onto.save()