# Description

## Introduction

    The main goal of the **Description** module is to introduce means to enrich the PropaPhen Ontology in order to acquire a domain-specific ontology for a specific phenomenon

In [1]:
%load_ext autoreload
%autoreload 2

## Libraries

### Installation

In [2]:
#!pip install owlready2
#!pip install tqdm
#!pip install pkt_kg

### Standard

In [3]:
from owlready2 import *
import pandas as pd
from tqdm import tqdm

### Custom

In [None]:
class DescriptionModule():
    def __init__(self, propaphen_ontology, kb_ontology, net_ontology):
        self.propaphen = propaphen
        self.kb = kb_ontology
        self.net = net_ontology
    
    def 

## Globals

In [35]:
path_propaphen = "../PropaPhen/PropaPhen.owl"
path_to_mrrel = "../data/umls/MRREL.RRF"
path_to_srdef = "../data/umls/SRDEF"
path_to_worldkg_ontology = "../data/worldkg/WorldKG_Ontolgy.owl"
path_to_worldkg_nodes = "../data/worldkg/worldkg_nodes.csv"
path_to_worldkg_edges = "../data/worldkg/worldkg_edges.csv"

In [36]:
path_save_umlsonto = "../data/propaphenplus/saved/umlsonto.owl"
path_save_worldkg = "../data/propaphenplus/saved/worldkg.owl"

## Ontologies

### PropaPhen

In [6]:
propaphen = get_ontology("file://" + path_propaphen)

In [8]:
propaphen = propaphen.load()

In [9]:
propaphen_classes = list(propaphen.classes())
propaphen_dict_classes = dict(zip([x.__name__ for x in propaphen_classes],propaphen_classes))

In [10]:
propaphen_obproperties = list(propaphen.object_properties())
propaphen_dict_obproperties = dict(zip([x.__name__ for x in propaphen_obproperties],propaphen_obproperties))

# UMLS


---

UMLS has as main concepts ([ref](https://www.nlm.nih.gov/research/umls/new_users/online_learning/Meta_005.html)):
 - Concept Unique Identifier (CUI):
 - Lexical (term) Unique Identifiers (LUI)
 - String Unique Identifiers (SUI)
 - Atom Unique Identifier (AUI):

Other than those concepts, UMLS also counts with a Semantic Network that consists of (1) a set of broad subject categories, or **Semantic Types**, that provide a consistent categorization of all concepts represented in the UMLS Metathesaurus, and (2) a set of useful and important relationships, or Semantic Relations, that exist between Semantic Types.

---
This subsection describes the aforementioned concepts and retrieves all properties found in the UMLS 2023AB repository

In [11]:
umlsonto = get_ontology("https://w3id.org/def/umls")
# Adding concepts/classes
with umlsonto:
    class UMLSEntity(Thing):
        pass
    class CUI(UMLSEntity):
        pass
    class LUI(UMLSEntity):
        pass
    class SUI(UMLSEntity):
        pass
    class AUI(UMLSEntity):
        pass
    class SemanticType(UMLSEntity):
        pass
    AllDisjoint([CUI, LUI, SUI, AUI, SemanticType])
    UMLSEntity.equivalent_to = [CUI | LUI | SUI | AUI | SemanticType]

#### Adding CUI/AUI relationships

In [12]:
aui_rel = set()
cui_rel = set()
aui_cui_rel = set()
cui_aui_rel = set()

In [15]:
with open(path_to_mrrel, mode= 'r', encoding= 'utf-8') as mrrel: 
    for line in tqdm(mrrel):
        line = line.split("|")
        start_node= line[4]
        end_node= line[0]
        # aui and aui
        if line[6]== 'AUI' and line[2]== 'AUI': # style 2
            aui_rel.add(line[3])  
        elif line[6]== 'AUI': # style 2
            cui_aui_rel.add(line[3])
        if line[2]== 'AUI': # syle 1
            aui_cui_rel.add(line[3])
        else:
            cui_rel.add(line[3])

55685992it [01:04, 869195.72it/s]


In [16]:
cui_domain=cui_rel | cui_aui_rel
aui_domain= aui_cui_rel | aui_rel
cui_range = cui_rel | aui_cui_rel
aui_range = aui_rel | cui_aui_rel

In [17]:
all_rel = list((cui_rel | cui_aui_rel | aui_cui_rel | aui_rel) -  set([str(x) for x in range(10)]))

In [18]:
with umlsonto:
    for rel in all_rel:
        opdomain = []
        oprange = []
        if rel in cui_domain:
            opdomain.append(CUI)
        if rel in aui_domain:
            opdomain.append(AUI)
        if rel in cui_range:
            oprange.append(CUI)
        if rel in aui_range:
            oprange.append(AUI)
        addRelation = type(rel,(ObjectProperty,),  {
            'domain' : opdomain,
            'range' : oprange
        })

#### Adding Semantic Network relationships

In [19]:
semanticrels = pd.read_csv(path_to_srdef,sep='|', header=None, usecols=[0,1])

In [20]:
semanticrelslist = semanticrels[semanticrels[0]=='RL'][1].tolist()

In [21]:
# For each relation
with umlsonto:
    for rel in semanticrelslist:
        addRelation = type(rel,(ObjectProperty,),  {
            'domain' : [SemanticType],
            'range' : [SemanticType]
        })

#### isa type

In [22]:
with umlsonto:
    addRelation = type("isa",(ObjectProperty,),  {
            'domain' : [SemanticType],
            'range' : [SemanticType]
        })

#### CUI and Semantic Network

In [23]:
# For each relation
with umlsonto:
    addRelation = type("STY",(ObjectProperty,),  {
        'domain' : [CUI],
        'range' : [SemanticType]
    })

In [26]:
umlsonto.save(file=path_save_umlsonto,format="rdfxml")

### World-KG

In [37]:
worldkg = get_ontology("file://" + path_to_worldkg_ontology)

In [39]:
worldkg = worldkg.load()

In [40]:
worldkg_dict_classes = dict(zip([x.__name__ for x in 
                                worldkg.classes()],
                               worldkg.classes()))

In [41]:
kgnodes = pd.read_csv(path_to_worldkg_nodes)

  kgnodes = pd.read_csv(path_to_worldkg_nodes)


In [42]:
dict_label = dict(zip(kgnodes.loc[:,"id:ID"],kgnodes.loc[:,":LABEL"]))

In [43]:
dict_str_to_class = {}
for value in set(dict_label.values()):
    dict_str_to_class[value] = worldkg_dict_classes[value.split(":")[-1]]

In [44]:
wkgdomain = {}
wkgrange = {}
for worldtype in set(dict_label.values()):
    wkgdomain[worldtype] = set()
    wkgrange[worldtype] = set()

In [45]:
all_rel = set()
with open(path_to_worldkg_edges, mode= 'r', encoding= 'utf-8') as mrrel: 
    count = 0
    for line in tqdm(mrrel):
        if count == 0:
            count += 1
            continue
        line = line.split(",")
        property_name = line[2].replace("\n","")
        all_rel.add(property_name)
        domain_obj = dict_label[line[0]]
        range_obj = dict_label[line[1]]
        wkgdomain[domain_obj].add(property_name)
        wkgrange[range_obj].add(property_name)

2228040it [00:03, 730180.73it/s]


In [None]:
# For each relation
with worldkg:
    # For relationship in list of all relationships
    for rel in all_rel:
        domainlist = []
        rangelist = []
        # Find list of domain concepts
        for key in wkgdomain.keys():
            if rel in wkgdomain[key]:
                domainlist.append(dict_str_to_class[key])
        # Find list of range concepts
        for key in wkgrange.keys():
            if rel in wkgrange[key]:
                rangelist.append(dict_str_to_class[key])
        # Add relationship to ontology
        addRelation = type(rel,(ObjectProperty,),  {
            'domain' : domainlist,
            'range' : rangelist
        })

In [None]:
worldkg.save(file=path_save_worldkg,format="rdfxml")

## Semi-Automatic Alignment

In [None]:
propaphen.imported_ontologies.append(umlsonto)
propaphen.imported_ontologies.append(worldkg)

In [None]:
# Add wordkg as spatial object
for x in list(worldkg.classes()):
    if Thing in x.is_a:
        x.is_a.remove(Thing)
    x.is_a.append(propaphen_dict_classes['System'])

In [None]:
# Adding umls entities to gufo
umlsonto.search(iri='*UMLSEntity')[0].is_a = [propaphen_dict_classes['Entity']]

In [None]:
len(list(worldkg.object_properties()))

In [None]:
dict_str_to_class

In [None]:
dict_regionlvl = {
    dict_str_to_class['wkgs:Region'] : -1,
     dict_str_to_class['wkgs:Island'] : 1,
    dict_str_to_class['wkgs:County'] : 2,
    dict_str_to_class['wkgs:Municipality'] : 1,
    dict_str_to_class['wkgs:Country'] : 4,
    dict_str_to_class['wkgs:Continent'] : 5,
    dict_str_to_class['wkgs:Village'] : 1,
    dict_str_to_class['wkgs:State'] : 3,
    dict_str_to_class['wkgs:City'] : 1
}

In [None]:
def systemPropertyAlignment(worldkg,dict_regionlvl,subSystemOf,connectedTo,hasSubSystem):
    # Transforms lists of places into list of levels
    def listToLevels(placelist,dict_regionlvl):
        newlist = []
        for p in placelist:
            newlist.append(dict_regionlvl[p])
        return newlist
    # Modify relationships parent properties
    for rel in tqdm(worldkg.object_properties()):
        domainl = listToLevels(rel.domain,dict_regionlvl)
        rangel = listToLevels(rel.range,dict_regionlvl)
        if set(domainl) == set(rangel):
            rel.is_a = [connectedTo]
        elif max(domainl) <= min(rangel):
            rel.is_a = [subSystemOf]
        elif max(rangel) <= min(domainl):
            rel.is_a = [hasSubSystem]

In [None]:
systemPropertyAlignment(worldkg,dict_regionlvl,
                       propaphen_dict_obproperties['subSystemOf'],
                       propaphen_dict_obproperties['connectedTo'],
                       propaphen_dict_obproperties['hasSubSystem'])

In [None]:
# Defining with pre-defined list
worldkg_obproperties = list(worldkg.object_properties())
worldkg_dict_obproperties = dict(zip([x.__name__ for x in worldkg_obproperties],worldkg_obproperties))

worldkg_dict_obproperties['wdp:P17'].is_a = [propaphen_dict_obproperties['subSystemOf']]
worldkg_dict_obproperties['wdp:P131'].is_a = [propaphen_dict_obproperties['subSystemOf']]

In [None]:
worldkg.save(file=path_save_worldkg,format="rdfxml")
umlsonto.save(file=path_save_umlsonto,format="rdfxml")
propaphen.save(file="data/saved/propaphenplus.owl",format="rdfxml")