In [1]:
from owlready2 import *

def extract_metadata(entity):
    """
    Extract metadata for a given OWL entity (e.g., comment, label, defined_by).
    """
    metadata = {}
    if hasattr(entity, 'comment'):
        metadata['comment'] = entity.comment
    if hasattr(entity, 'label'):
        metadata['label'] = entity.label
    if hasattr(entity, 'isDefinedBy'):
        metadata['defined_by'] = [str(x) for x in entity.isDefinedBy]
    if hasattr(entity, 'iri'):
        metadata['iri'] = entity.iri
    return metadata

def extract_classes(ontology):
    """
    Extract all classes in the ontology as a list of dictionaries, including subclasses.
    """
    classes = []
    for cls in ontology.classes():
        class_info = {'name': cls.name}
        class_info.update(extract_metadata(cls))
        # Add subclasses
        class_info['subclasses'] = [sub.name for sub in cls.subclasses()]
        classes.append(class_info)
    return classes

def extract_object_properties(ontology):
    """Extract all object properties in the ontology as a list of dictionaries."""
    object_properties = []
    for prop in ontology.object_properties():
        prop_info = {'name': prop.name}
        prop_info.update(extract_metadata(prop))
        object_properties.append(prop_info)
    return object_properties

def extract_data_properties(ontology):
    """Extract all data properties in the ontology as a list of dictionaries."""
    data_properties = []
    for prop in ontology.data_properties():
        prop_info = {'name': prop.name}
        prop_info.update(extract_metadata(prop))
        data_properties.append(prop_info)
    return data_properties

def extract_annotation_properties(ontology):
    """
    Extract all annotation properties in the ontology as a list of dictionaries.
    """
    annotation_properties = []
    for prop in ontology.annotation_properties():
        prop_info = {'name': prop.name}
        prop_info.update(extract_metadata(prop))
        annotation_properties.append(prop_info)
    return annotation_properties

In [2]:
# Load the ontology file
owl_file = "./OWLs/SOMA.owl"  # Replace with the actual file path
onto = get_ontology(owl_file).load()

In [3]:
# Extract entities
classes = extract_classes(onto)
object_properties = extract_object_properties(onto)
data_properties = extract_data_properties(onto)
annotation_properties = extract_annotation_properties(onto)

In [4]:
# Print results
print("Classes:", len(classes))
print("Object Properties:", len(object_properties))
print("Data Properties:", len(data_properties))
print("Annotation Properties:", len(annotation_properties))

Classes: 537
Object Properties: 262
Data Properties: 38
Annotation Properties: 3


In [5]:
print("Class:", classes[0].items())
print("Object Property:", object_properties[2].items())
print("Data Property:", data_properties[2].items())
print("Annotation Property:", annotation_properties[2].items())

Class: dict_items([('name', 'Affordance'), ('comment', ['A relation between an object (the bearer) and others (the triggers) that describes the disposition of the bearer to be involved in an action execution that also involves some trigger object.']), ('label', []), ('defined_by', ['http://www.ease-crc.org/ont/SOMA-OBJ.owl']), ('iri', 'http://www.ease-crc.org/ont/SOMA.owl#Affordance'), ('subclasses', [])])
Object Property: dict_items([('name', 'isAffectedBy'), ('comment', ['Simple relationship between two actions to express that a variation in the course or outcome of the object (the affector) would have resulted in a variation in the subject (the affectee), e.g., a planning task that sets parameters such as goal position affects the subsequently executed pick-and-place task that uses that parameter.']), ('label', ['is affected by']), ('defined_by', ['http://www.ease-crc.org/ont/SOMA-ACT.owl']), ('iri', 'http://www.ease-crc.org/ont/SOMA.owl#isAffectedBy')])
Data Property: dict_items([(

In [11]:
import re

def find_classes_by_name(classes, pattern):
    matching_classes = []
    regex = re.compile(pattern, re.IGNORECASE)
    for cls in classes:
        if regex.search(cls['name']):  # Check if the pattern matches the class name
            matching_classes.append(cls)
    return matching_classes
pattern = r"^hold.*"  # Matches any class name that starts with 'Person'
matching_classes = find_classes_by_name(classes, pattern)
# Print results
print(f"Classes matching pattern '{pattern}':")
for cls in matching_classes:
    print(cls)

Classes matching pattern '^hold.*':
{'name': 'Holding', 'comment': ['A task by which an Agent keeps an object over which it has kinematic control, typically via grasping, at some specified pose.'], 'label': [], 'defined_by': ['http://www.ease-crc.org/ont/SOMA-ACT.owl'], 'iri': 'http://www.ease-crc.org/ont/SOMA.owl#Holding', 'subclasses': []}


In [12]:
from rapidfuzz import process

def find_similar_entities(word, lists, threshold=80):
    results = {}
    for category, entities in lists.items():
        matches = []
        for entity in entities:
            # Compute similarity between the word and entity name
            name = entity['name']
            similarity = process.extractOne(word, [name], score_cutoff=threshold)
            if similarity:
                matches.append({'name': name, 'score': similarity[1], 'details': entity})
        results[category] = matches
    return results

entity_lists = {'Classes': classes, 'Object Properties': object_properties, 'Data Properties': data_properties, 'Annotation Properties': annotation_properties}

In [16]:
# Input word to search
word = 'approach'

# Find similar entities
similar_matches = find_similar_entities(word, entity_lists, threshold=70)

# Print results
for category, matches in similar_matches.items():
    print(f"Category: {category}")
    for match in matches:
        print(f"  Name: {match['name']} (Score: {match['score']})")
        print(f"  Details: {match['details']}")

Category: Classes
  Name: Approaching (Score: 73.6842105263158)
  Details: {'name': 'Approaching', 'comment': ['A process type to classify motions by which a body approaches some object or location.'], 'label': [], 'defined_by': ['http://www.ease-crc.org/ont/SOMA-PROC.owl'], 'iri': 'http://www.ease-crc.org/ont/SOMA.owl#Approaching', 'subclasses': []}
Category: Object Properties
Category: Data Properties
Category: Annotation Properties


In [18]:
json_data = {
    "classes": classes,
    "object_properties": object_properties,
    "data_properties": data_properties,
    "annotation_properties": annotation_properties
}

In [52]:
import json
from elasticsearch import Elasticsearch
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
# Initialize Elasticsearch client
es = Elasticsearch(["http://localhost:9200"])
# Initialize Sentence Transformer for embeddings
model = SentenceTransformer('all-MiniLM-L6-v2')
# FAISS index parameters
embedding_dimension = 384  # Matches the dimension of the embedding model
faiss_index = faiss.IndexFlatL2(embedding_dimension)

In [57]:
def encode_and_store_in_faiss(json_data):
    descriptions = []
    ids = []
    faiss_mappings = {}

    # Iterate over all categories in the JSON data
    for category, entities in json_data.items():
        for entity in entities:
            # Generate a unique ID for each entity
            entity_id = f"{category}_{entity['name']}"

            # Combine name and comment for embedding
            name = entity.get('name', '')
            comment = ' '.join(entity.get('comment', []))
            description = f"{comment}".strip()

            if description:  # Only process if description is not empty
                descriptions.append(description)
                ids.append(entity_id)
                faiss_mappings[entity_id] = entity

    # Generate embeddings
    embeddings = model.encode(descriptions)

    # Convert to FAISS compatible format and add to index
    faiss_index.reset()
    faiss_index.add(np.array(embeddings, dtype=np.float32))
    print(f"FAISS index size: {faiss_index.ntotal}")


    print(f"Added {len(embeddings)} embeddings to FAISS index")
    return ids, faiss_mappings

In [58]:
ids, faiss_mappings = encode_and_store_in_faiss(json_data)

# Save FAISS index and corresponding IDs
faiss.write_index(faiss_index, "./indexes/faiss_index.bin")
with open("indexes/faiss_ids.json", "w") as f:
    json.dump(ids, f)

# Save FAISS mappings to JSON
with open("./indexes/faiss_mapping.json", "w") as f:
    json.dump(faiss_mappings, f)

# Save FAISS mappings to JSON
with open("./indexes/faiss_ids.json", "w") as f:
    json.dump(ids, f)

FAISS index size: 815
Added 815 embeddings to FAISS index


In [59]:
# Load FAISS index
faiss_index = faiss.read_index("./indexes/faiss_index.bin")
with open("./indexes/faiss_mapping.json", "r") as f:
    faiss_mappings = json.load(f)
with open("./indexes/faiss_ids.json", "r") as f:
    faiss_ids = json.load(f)
# Initialize embedding model
model = SentenceTransformer('all-MiniLM-L6-v2')

In [60]:
# Query FAISS with semantic matching
def query_faiss(query_text, top_k=5):
    # Encode the query text into an embedding
    query_embedding = model.encode([query_text]).astype(np.float32)
    # Search in FAISS
    distances, indices = faiss_index.search(query_embedding, top_k)
    # Display results
    print(f"Query: \"{query_text}\"")
    print(f"Top {top_k} results:\n")
    print(type(indices), indices)
    print(len(faiss_ids))
    for i, idx in enumerate(indices[0]):
        print("ENumeration ", i, idx)
        if idx != -1:  # Ensure valid index

            entity_id = list(faiss_mappings.keys())[idx]
            entity_details = faiss_mappings[entity_id]
            name = entity_details.get('name', 'Unknown')
            comment = " ".join(entity_details.get('comment', []))
            print(f"Rank {i+1}:")
            print(f"  Name: {name}")
            print(f"  Comment: {comment}")
            print(f"  Distance: {distances[0][i]:.4f}")
            print()

# Example query: Find descriptions semantically matching "Cut an apple"
query_faiss("approach", top_k=3)

Query: "approach"
Top 3 results:

<class 'numpy.ndarray'> [[288  59 491]]
815
ENumeration  0 288
Rank 1:
  Name: FailedAttempt
  Comment: A description of a failed attempt to achieve some goal.
  Distance: 1.3662

ENumeration  1 59
Rank 2:
  Name: Plan
  Comment: A Description having an explicit Goal, to be achieved by executing the plan
  Distance: 1.4168

ENumeration  2 491
Rank 3:
  Name: SupportState
  Comment: Classifies States in which an object is not able to move under gravity because of its placement relative to some other object.
  Distance: 1.4174



In [62]:
from elasticsearch import Elasticsearch

# Initialize Elasticsearch client
es = Elasticsearch("http://localhost:9200")  # Replace with your Elasticsearch endpoint

def upload_to_elasticsearch(json_data, index_name):
    for category, entities in json_data.items():
        for entity in entities:
            # Create a document for Elasticsearch
            document = {
                "category": category,
                "name": entity.get("name", "Unknown"),
                "comment": entity.get("comment", []),
                "iri": entity.get("iri", "Unknown")
            }
            # Use a unique ID for each document based on category and name
            document_id = f"{category}_{entity['name']}"

            # Index the document into Elasticsearch
            es.index(index=index_name, id=document_id, document=document)
            print(f"Uploaded entity with ID: {document_id} to Elasticsearch")

In [63]:
# Upload json_data to Elasticsearch
upload_to_elasticsearch(json_data, "ontology_index")

  es.index(index=index_name, id=document_id, document=document)


Uploaded entity with ID: classes_Affordance to Elasticsearch
Uploaded entity with ID: classes_Concept to Elasticsearch
Uploaded entity with ID: classes_Task to Elasticsearch
Uploaded entity with ID: classes_Disposition to Elasticsearch
Uploaded entity with ID: classes_Role to Elasticsearch
Uploaded entity with ID: classes_Setpoint to Elasticsearch
Uploaded entity with ID: classes_Entity to Elasticsearch
Uploaded entity with ID: classes_Answer to Elasticsearch
Uploaded entity with ID: classes_Message to Elasticsearch
Uploaded entity with ID: classes_Event to Elasticsearch
Uploaded entity with ID: classes_Transition to Elasticsearch
Uploaded entity with ID: classes_PhysicalObject to Elasticsearch
Uploaded entity with ID: classes_Description to Elasticsearch
Uploaded entity with ID: classes_EventType to Elasticsearch
Uploaded entity with ID: classes_Parameter to Elasticsearch
Uploaded entity with ID: classes_ProcessType to Elasticsearch
Uploaded entity with ID: classes_InformationObject t

In [68]:
from elasticsearch import Elasticsearch

# Initialize Elasticsearch client
es = Elasticsearch(["http://localhost:9200"])

# Query Elasticsearch
def query_elasticsearch(index_name, field, regex):
    query = {
        "regexp": {
            field: {
                "value": regex  # Regular expression for the field value
            }
        }
    }
    response = es.search(index=index_name, body={"query": query})
    hits = response['hits']['hits']
    print(f"Query results for '{field}: {regex}' in index '{index_name}':")
    for hit in hits:
        print(f"ID: {hit['_id']}, Source: {hit['_source']}")

In [71]:
# Example query: Find all entities with "Person" in the name field
query_elasticsearch("ontology_index", "name", "app.*")

Query results for 'name: app.*' in index 'ontology_index':
ID: classes_Appliance, Source: {'category': 'classes', 'name': 'Appliance', 'comment': ['A device designed to perform a specific task, and that can be operated in some way.'], 'iri': 'http://www.ease-crc.org/ont/SOMA.owl#Appliance'}
ID: classes_Approaching, Source: {'category': 'classes', 'name': 'Approaching', 'comment': ['A process type to classify motions by which a body approaches some object or location.'], 'iri': 'http://www.ease-crc.org/ont/SOMA.owl#Approaching'}


  response = es.search(index=index_name, body={"query": query})
