In [1]:
import os
import pandas as pd
from rdflib import Graph, RDF, OWL, URIRef, RDFS

*MeNU GUIDE*
# Ontology Merging
## Load Ontologies

In [None]:
ontology_folder = "/path/to/ontologies/"

In [2]:
merged_graph = Graph()

In [3]:
merged_graph.parse(os.path.join(ontology_folder, "chebi.owl"), format="xml")
len(merged_graph)

6918400

In [4]:
merged_graph.parse(os.path.join(ontology_folder, "chiro.owl"), format="xml")
len(merged_graph)

6921523

In [5]:
merged_graph.parse(os.path.join(ontology_folder, "go-plus.owl"), format="xml")
len(merged_graph)

9248945

In [6]:
merged_graph.parse(os.path.join(ontology_folder, "SBO_OWL.owl"), format="xml")
len(merged_graph)

9251935

In [7]:
merged_graph.parse(os.path.join(ontology_folder, "doid-merged.owl"), format="xml")
len(merged_graph)

9538669

In [8]:
merged_graph.parse(os.path.join(ontology_folder, "dron.owl"), format="xml")
len(merged_graph)

16766330

In [9]:
merged_graph.parse(os.path.join(ontology_folder, "foodon.owl"), format="xml")
len(merged_graph)

17191883

In [10]:
merged_graph.parse(os.path.join(ontology_folder, "fobi.owl"), format="xml")
len(merged_graph)

17223156

In [11]:
merged_graph.parse(os.path.join(ontology_folder, "fideo.owl"), format="xml")
len(merged_graph)

17267534

In [12]:
merged_graph.parse(os.path.join(ontology_folder, "cdno.owl"), format="xml")
len(merged_graph)

17373676

In [13]:
merged_graph.parse(os.path.join(ontology_folder, "mp-full.owl"), format="xml")
len(merged_graph)

18714653

In [14]:
merged_graph.parse(os.path.join(ontology_folder, "hp.owl"), format="xml")
len(merged_graph)

19473236

In [15]:
merged_graph.parse(os.path.join(ontology_folder, "envo.owl"), format="xml")
len(merged_graph)

19565843

In [18]:
merged_graph.serialize(destination=os.path.join(ontology_folder, "merged_without_mapping.ttl"), format="turtle")

<Graph identifier=Nbe05403737fa44c3968f5fac07b9513b (<class 'rdflib.graph.Graph'>)>

In [19]:
merged_graph = Graph()
merged_graph.parse(os.path.join(ontology_folder, "merged_without_mapping.ttl"), format="turtle")
len(merged_graph)

19565861

## Add mapping info

In [20]:
mapping_file = pd.read_csv(os.path.join(ontology_folder, "all_mappings.csv"))

In [21]:
mapping_file

Unnamed: 0.1,Unnamed: 0,entity_1,entity_2
0,0,http://purl.obolibrary.org/obo/CHEBI_15600,http://purl.obolibrary.org/obo/CHEBI_23053
1,1,http://purl.obolibrary.org/obo/CHEBI_36080,http://purl.obolibrary.org/obo/PR_000000001
2,2,http://purl.obolibrary.org/obo/CHEBI_22470,http://purl.obolibrary.org/obo/CHEBI_18145
3,3,http://purl.obolibrary.org/obo/CHEBI_176843,http://purl.obolibrary.org/obo/CHEBI_17439
4,4,http://purl.obolibrary.org/obo/CHEBI_42255,http://purl.obolibrary.org/obo/CHEBI_71227
...,...,...,...
3716,5625,http://purl.obolibrary.org/obo/HP_0001572,http://purl.obolibrary.org/obo/MP_0030091
3717,5629,http://purl.obolibrary.org/obo/FOODON_03412050,http://purl.obolibrary.org/obo/FOODON_03414195
3718,5630,http://purl.obolibrary.org/obo/CHEBI_31608,http://purl.obolibrary.org/obo/DRON_00016651
3719,5631,http://purl.obolibrary.org/obo/NCBITaxon_65561,http://purl.obolibrary.org/obo/fideo/FIDEO_000...


In [22]:
def is_class_or_property(uri):
    if (uri, RDF.type, OWL.Class) in merged_graph or (uri, RDF.type, RDFS.Class) in merged_graph:
        return 'class'
    elif (uri, RDF.type, RDF.Property) in merged_graph or (uri, RDF.type, OWL.ObjectProperty) in merged_graph or (uri, RDF.type, OWL.DatatypeProperty) in merged_graph:
        return 'property'
    else:
        return None
    
def uri_exists(uri):
    """Check if a URI exists in the graph as a subject, predicate, or object."""
    uri_ref = URIRef(uri)
    if (uri_ref, None, None) in merged_graph or (None, uri_ref, None) in merged_graph or (None, None, uri_ref) in merged_graph:
        return True
    return False

In [23]:
for index, row in mapping_file.iterrows():
    entity1 = URIRef(row['entity_1'])
    entity2 = URIRef(row['entity_2'])
    
    if uri_exists(entity1) and uri_exists(entity2):
        entity1_type = is_class_or_property(entity1)
        entity2_type = is_class_or_property(entity2)
        
        if entity1_type == 'class' and entity2_type == 'class':
            merged_graph.add((entity1, OWL.equivalentClass, entity2))
            merged_graph.add((entity2, OWL.equivalentClass, entity1))
        elif entity1_type == 'property' and entity2_type == 'property':
            merged_graph.add((entity1, OWL.equivalentProperty, entity2))
            merged_graph.add((entity2, OWL.equivalentProperty, entity1))
        else:
            print(f"Warning: Could not determine type for entities {entity1} and {entity2}")
    else:
        print(f"Warning: entity 1 {entity1} {'exists' if uri_exists(entity1) else 'does not exist'} in the graph, entity 2 {entity2} {'exists' if uri_exists(entity2) else 'does not exist'} in the graph.")



In [24]:
len(merged_graph)

19572895

In [25]:
len(merged_graph) - 19565843

7052

In [26]:
merged_graph.serialize(destination=os.path.join(ontology_folder, "merged_with_mapping.ttl"), format="turtle")

<Graph identifier=Nf3d2833ed65449258be3ec1ec1cde025 (<class 'rdflib.graph.Graph'>)>