In [None]:
pip install rdflib

In [1]:
import requests
import pandas as pd
from pathlib import Path
import json
import re
from collections import Counter
import numpy as np
from typing import Dict, List, Tuple, Optional
from openpyxl import load_workbook
from rdflib import Graph, Namespace, RDF, RDFS, OWL, Literal, URIRef
from rdflib.namespace import DC, DCTERMS, SKOS

# Load Input Data

In [2]:
#file_path_ontologies = filedialog.askopenfilename(title="Select file listing all ontologies)")
#file_path_ontologies = "C:/Users/fbosche/Documents/GitHub/BE-OLS/data/source/ontologies_source.xlsx"
file_path_ontologies = "C:/Users/fbosche/University College London/EC3 - 1. Modelling and Standards - 1. Modelling and Standards/Material/Project D_Ontologies/Scripts/ontologies_source.xlsx"
print(file_path_ontologies)

# Read the 'Data' sheet from the file into a pandas DataFrame
sheet_name = 'Data'
try:
    df_ontologies = pd.read_excel(file_path_ontologies, sheet_name=sheet_name)
    print(f"{sheet_name} data loaded successfully!")
except Exception as e:
    print(f"An error occurred while loading {sheet_name} data: {e}")

C:/Users/fbosche/University College London/EC3 - 1. Modelling and Standards - 1. Modelling and Standards/Material/Project D_Ontologies/Scripts/ontologies_source.xlsx
Data data loaded successfully!


  for idx, row in parser.parse():


# Get Information from TTL files

In [3]:
# Define common ontology namespaces
DC = Namespace("http://purl.org/dc/elements/1.1/")
DCTERMS = Namespace("http://purl.org/dc/terms/")
VANN = Namespace("http://purl.org/vocab/vann/")
RDFS = Namespace("http://www.w3.org/2000/01/rdf-schema#")
FOAF = Namespace("http://xmlns.com/foaf/0.1/")


def normalize_uri(uri: str) -> str:
    """
    Normalize URI for comparison:
    - Convert https to http
    - Remove trailing / or #
    
    This ensures URIs like 'https://w3id.org/dot#' and 'https://w3id.org/dot/' 
    will match after normalization.
    
    Args:
        uri: URI string to normalize
        
    Returns:
        Normalized URI string
    """
    if not uri:
        return uri
    if uri.startswith('https://'):
        uri = 'http://' + uri[8:]
    return uri.rstrip('/#')


def is_prefix_used_in_graph(prefix_uri, all_uris):
    """
    Check if a prefix namespace URI is actually used in the graph.
    A prefix is considered used if any URI in the graph starts with the prefix URI.
    
    Args:
        prefix_uri: The namespace URI of the prefix (e.g., 'http://purl.org/dc/terms/')
        all_uris: Set of all URIs found in the graph (subjects, predicates, and objects)
        
    Returns:
        True if the prefix is used, False otherwise
    """
    # Normalize the prefix URI (but keep track of original ending)
    prefix_ends_with_separator = prefix_uri.endswith('/') or prefix_uri.endswith('#')
    normalized_prefix = normalize_uri(prefix_uri)
    
    for uri in all_uris:
        normalized_uri = normalize_uri(uri)
        
        # Exact match
        if normalized_uri == normalized_prefix:
            return True
        
        # If prefix originally ended with / or #, check if URI starts with normalized prefix
        if prefix_ends_with_separator:
            if normalized_uri.startswith(normalized_prefix + '/') or \
               normalized_uri.startswith(normalized_prefix + '#') or \
               (len(normalized_uri) > len(normalized_prefix) and 
                normalized_uri.startswith(normalized_prefix)):
                return True
    
    return False


def extract_ontology_uris(ttl_file_path, debug=False):
    """
    Extract ontology URIs with detailed information from a TTL file.
    Only includes prefixes that are actually used in the ontology content.
    Prevents duplicate URI bases (e.g., when two prefixes point to the same URI).
    
    Args:
        ttl_file_path: Path to the TTL file
        debug: If True, print debug information
        
    Returns:
        List of dicts with 'prefix' and 'ontology_base' keys
    """
    # Read file to extract only the prefix declarations actually in the file
    prefix_map = {}
    with open(ttl_file_path, 'r', encoding='utf-8') as f:
        for line in f:
            line = line.strip()
            # Match @prefix declarations
            if line.startswith('@prefix'):
                match = re.match(r'@prefix\s+(\w*):\s+<(.+?)>', line)
                if match:
                    prefix_map[match.group(1)] = match.group(2)
            # Match PREFIX declarations (SPARQL style)
            elif line.upper().startswith('PREFIX'):
                match = re.match(r'PREFIX\s+(\w*):\s+<(.+?)>', line, re.IGNORECASE)
                if match:
                    prefix_map[match.group(1)] = match.group(2)
    
    if debug:
        print(f"  Prefixes declared in file: {list(prefix_map.keys())}")
    
    g = Graph()
    g.parse(ttl_file_path, format='turtle')

    # Find the URI of the ontology being analyzed
    own_ontology_uris = set()
    own_ontology_prefixes = set()
    
    for s in g.subjects(RDF.type, OWL.Ontology):
        uri = str(s)
        if uri.startswith('http://') or uri.startswith('https://'):
            if '#' in uri:
                base = uri.rsplit('#', 1)[0] + '#'
            else:
                base = uri.rsplit('/', 1)[0] + '/'
            own_ontology_uris.add(normalize_uri(base))
            own_ontology_uris.add(normalize_uri(uri))
            
            # Find if this ontology has a prefix
            for prefix, prefix_uri in prefix_map.items():
                if normalize_uri(prefix_uri) == normalize_uri(uri) or \
                   normalize_uri(prefix_uri) == normalize_uri(base):
                    own_ontology_prefixes.add(prefix)
    
    if debug:
        print(f"  Own ontology URIs (normalized): {own_ontology_uris}")
        print(f"  Own ontology prefixes: {own_ontology_prefixes}")

    # Collect ALL full URIs used in subjects, predicates, AND objects
    all_uris = set()
    uri_bases = set()
    
    for s, p, o in g:
        # Check subject
        uri = str(s)
        if uri.startswith('http://') or uri.startswith('https://'):
            all_uris.add(uri)
            if '#' in uri:
                base = uri.rsplit('#', 1)[0] + '#'
            else:
                base = uri.rsplit('/', 1)[0] + '/'
            uri_bases.add(base)
        
        # Check predicate
        uri = str(p)
        if uri.startswith('http://') or uri.startswith('https://'):
            all_uris.add(uri)
            if '#' in uri:
                base = uri.rsplit('#', 1)[0] + '#'
            else:
                base = uri.rsplit('/', 1)[0] + '/'
            uri_bases.add(base)
        
        # Check object - but only if it's a URI, not a Literal
        if isinstance(o, URIRef):
            uri = str(o)
            if uri.startswith('http://') or uri.startswith('https://'):
                all_uris.add(uri)
                if '#' in uri:
                    base = uri.rsplit('#', 1)[0] + '#'
                else:
                    base = uri.rsplit('/', 1)[0] + '/'
                uri_bases.add(base)
    
    if debug:
        print(f"  Total unique URIs in graph (incl. objects): {len(all_uris)}")
    
    # Remove the ontology being analyzed from uri_bases
    uri_bases_filtered = set()
    for base in uri_bases:
        norm_base = normalize_uri(base)
        if norm_base not in own_ontology_uris:
            uri_bases_filtered.add(base)
    uri_bases = uri_bases_filtered
    
    # Merge prefix mappings and discovered ontology bases
    ontology_list = []
    added_bases = set()
    
    # Only add prefix mappings that are ACTUALLY USED in the content
    # Also prevent duplicates (e.g., two prefixes pointing to the same URI)
    for prefix, prefix_uri in prefix_map.items():
        if prefix not in own_ontology_prefixes:
            is_used = is_prefix_used_in_graph(prefix_uri, all_uris)
            normalized = normalize_uri(prefix_uri)
            
            if debug:
                print(f"  Prefix '{prefix}' ({prefix_uri}): used={is_used}, already_added={normalized in added_bases}")
            
            # FIX: Check if this base was already added (handles duplicate prefixes)
            if is_used and normalized not in added_bases:
                ontology_list.append({
                    'prefix_auto': prefix,
                    'ontology_base': prefix_uri
                })
                added_bases.add(normalized)
            elif is_used and normalized in added_bases:
                if debug:
                    print(f"    -> Skipping duplicate prefix '{prefix}' (URI already added)")
    
    # Add any discovered bases that weren't matched to prefixes
    for base in sorted(uri_bases):
        normalized_base = normalize_uri(base)
        if normalized_base not in added_bases:
            ontology_list.append({
                'prefix_auto': None,
                'ontology_base': base
            })
            added_bases.add(normalized_base)
            if debug:
                print(f"  Added base without prefix: {base}")
    
    return ontology_list


def extract_ontology_metadata(ttl_file_path):
    """
    Extract metadata from the ontology being analyzed.
    
    Args:
        ttl_file_path: Path to the TTL file
        
    Returns:
        dict with ontology metadata fields
    """
    g = Graph()
    g.parse(ttl_file_path, format='turtle')
    
    # Find the ontology URI
    ontology_uri = None
    for s in g.subjects(RDF.type, OWL.Ontology):
        ontology_uri = s
        break
    
    if not ontology_uri:
        return {
            'error': 'No owl:Ontology declaration found in the file'
        }
    
    def get_value(predicate_list):
        """Helper to get the first available value from a list of predicates"""
        for pred in predicate_list:
            for obj in g.objects(ontology_uri, pred):
                if isinstance(obj, Literal):
                    return str(obj)
                else:
                    return str(obj)
        return None
    
    def get_all_values(predicate_list):
        """Helper to get all values from a list of predicates"""
        values = []
        for pred in predicate_list:
            for obj in g.objects(ontology_uri, pred):
                if isinstance(obj, Literal):
                    values.append(str(obj))
                else:
                    values.append(str(obj))
        return values if values else None
    
    # Extract metadata
    metadata = {
        'ontology_uri': str(ontology_uri),
        'title': get_value([DCTERMS.title, DC.title, RDFS.label]),
        'description': get_value([DCTERMS.description, DC.description, RDFS.comment]),
        'created': get_value([DCTERMS.created]),
        'issued': get_value([DCTERMS.issued]),
        'modified': get_value([DCTERMS.modified]),
        'creator': get_all_values([DCTERMS.creator, DC.creator]),
        'license': get_value([DCTERMS.license, DC.rights]),
        'publisher': get_value([DCTERMS.publisher, DC.publisher]),
        'version': get_value([OWL.versionInfo, DCTERMS.hasVersion]),
        'preferred_prefix': get_value([VANN.preferredNamespacePrefix]),
        'classes_count': sum(1 for _ in g.subjects(RDF.type, OWL.Class)),
        'data_properties_count': sum(1 for _ in g.subjects(RDF.type, OWL.DatatypeProperty)),
        'object_properties_count': sum(1 for _ in g.subjects(RDF.type, OWL.ObjectProperty))
    }

    return metadata


def build_uri_prefix_mapping(ontologies: List[Dict]) -> Dict[str, str]:
    """
    Build mapping of normalized URIs to their BE-OLS prefix.
    
    Args:
        ontologies: List of ontology dictionaries
        
    Returns:
        Dictionary mapping normalized URIs to prefixes
    """
    uri_to_prefix: Dict[str, str] = {}
    for onto in ontologies:
        normalized = normalize_uri(onto['uri'])
        uri_to_prefix[normalized] = onto['prefix_manual']
    return uri_to_prefix


def build_extended_uri_prefix_mapping(ontologies: List[Dict]) -> Dict[str, str]:
    """
    Build a complete mapping of normalized URIs to their most common prefix.
    
    Args:
        ontologies: List of ontology dictionaries with 'prefix', 'uri', 
                   and 'referenced_ontologies' keys
    
    Returns:
        Dictionary mapping normalized URIs to their most common prefix
    """
    uri_prefix_counts: Dict[str, Counter] = {}
    
    for onto in ontologies:
        if onto.get('prefix_auto') and onto.get('uri'):
            prefix = onto['prefix_auto']
            uri = onto['uri']
            normalized = normalize_uri(uri)
            
            if normalized not in uri_prefix_counts:
                uri_prefix_counts[normalized] = Counter()
            if prefix is not None:
                uri_prefix_counts[normalized][prefix] += 1
        
        ref_list = onto.get('referenced_ontologies', [])
        if ref_list:
            for ref in ref_list:
                prefix = ref.get('prefix_auto')
                uri = ref.get('ontology_base', '')
                
                if uri:
                    normalized = normalize_uri(uri)
                    
                    if normalized not in uri_prefix_counts:
                        uri_prefix_counts[normalized] = Counter()
                    if prefix is not None:
                        uri_prefix_counts[normalized][prefix] += 1
    
    uri_to_prefix: Dict[str, str] = {}
    for normalized_uri, prefix_counter in uri_prefix_counts.items():
        if prefix_counter:
            most_common_prefix = prefix_counter.most_common(1)[0][0]
            uri_to_prefix[normalized_uri] = most_common_prefix
    
    return uri_to_prefix


def fix_referenced_ontology_prefixes(onto: Dict, uri_to_prefix: Dict[str, str]) -> None:
    """
    Fix None prefix values in referenced ontologies using known mappings.
    """
    ref_list = onto.get('referenced_ontologies', [])
    if ref_list:
        for ref in ref_list:
            if ref.get('prefix_auto') is None:
                uri = ref.get('ontology_base', '')
                if uri:
                    normalized = normalize_uri(uri)
                    found_prefix = uri_to_prefix.get(normalized)
                    if found_prefix:
                        ref['prefix_auto'] = found_prefix


def create_linked_ontology_lists(onto, uri_to_prefix, debug=False):
    """
    Create lists of linked AECO and upper ontologies.
    
    Args:
        onto: Ontology dictionary
        uri_to_prefix: Mapping of normalized URIs to BE-OLS prefixes
        debug: If True, print debug information
        
    Returns:
        Tuple of (linked_aeco, linked_upper) lists
    """
    excluded_prefixes = {'owl', 'rdf', 'xml', 'xsd', 'rdfs'}

    linked_aeco = []
    linked_upper = []

    # Normalize self URI for comparison
    normalized_self_uri = normalize_uri(onto['uri'])

    if debug:
        print(f"  Processing {onto['prefix_manual']}:")
        print(f"    Self URI (normalized): {normalized_self_uri}")

    for ref in onto.get('referenced_ontologies', []):
        # Normalize the referenced ontology base for comparison
        ontology_base = normalize_uri(ref['ontology_base'])

        # Skip self-references
        if ontology_base == normalized_self_uri:
            if debug:
                print(f"    Skipping self-reference: {ontology_base}")
            continue

        # Check if this is an AECO ontology (exists in our BE-OLS list)
        if ontology_base in uri_to_prefix:
            aeco_prefix = uri_to_prefix[ontology_base]
            linked_aeco.append(aeco_prefix)
            if debug:
                print(f"    AECO: {ref.get('prefix', '(no prefix)')} -> {aeco_prefix} (matched {ontology_base})")
        else:
            # It's an upper/external ontology
            if ref['prefix_auto'] and ref['prefix_auto'] not in excluded_prefixes:
                linked_upper.append(ref['prefix_auto'])
                if debug:
                    print(f"    UPPER: {ref['prefix_auto']} ({ontology_base} not in BE-OLS)")
            elif ref['prefix_auto'] is None:
                if debug:
                    print(f"    EXTERNAL (no prefix, not in BE-OLS): {ontology_base}")
            elif debug:
                print(f"    Excluded: {ref['prefix_auto']} ({ontology_base})")
    
    return linked_aeco, linked_upper


def compare_ontology_lists(list_a, list_b):
    """Compare two ontology lists and print differences."""
    set_a = set(list_a) if list_a else set()
    if list_b:
        set_b = set(item.strip() for item in list_b.split(","))
    else:
        set_b = set()

    in_a_not_b = set_a - set_b
    in_b_not_a = set_b - set_a

    print("  In Auto but not BE-OLS:", in_a_not_b)
    print("  In BE-OLS but not Auto:", in_b_not_a)


def compare_ontology_strings(string_a: str, string_b: str):
    """Compare two strings and print the difference."""
    diff_str = ""
    if string_a is None:
        if string_b is None:
            diff_str = "  Same (None)"
        else:
            diff_str = "  In BE-OLS but not Auto: " + str(string_b)
    else:
        if string_b is None:
            diff_str = "  In Auto but not BE-OLS: " + str(string_a)
        else:
            if str(string_a) == str(string_b):
                diff_str = "  Same"
            else:
                diff_str = "  Different: " + str(string_a) + " | " + str(string_b)

    print(diff_str)


def compare_auto_and_manual(onto):
    """Compare auto-extracted data against BE-OLS data and print differences."""
    print(onto['prefix_manual'], ":")

    if not onto['ttl_exists']:
        print(" No Auto data (no ttl file).")
    else:
        print(" Comparison prefix:")
        compare_ontology_strings(onto['prefix_auto'], onto['prefix_manual'])
        print(" Comparison title:")
        compare_ontology_strings(onto['title_auto'], onto['title_manual'])
        print(" Comparison version:")
        compare_ontology_strings(str(onto['version_auto']), str(onto['version_manual']))
        print(" Comparison license:")
        compare_ontology_strings(onto['license_auto'], onto['license_manual'])    
        print(" Comparison description:")
        compare_ontology_strings(onto['description_auto'], onto['description_manual'])
        print(" Comparison linked_aeco:")
        compare_ontology_lists(onto['linked_aeco_auto'], onto['linked_aeco_manual'])
        print(" Comparison linked_upper:")
        compare_ontology_lists(onto['linked_upper_auto'], onto['linked_upper_manual'])


def create_unified_columns(onto):
    """Placeholder for creating unified columns from auto and BE-OLS data."""
    return None

def create_final_fields(onto):
    """
    Create '_final' fields by merging auto-extracted and BE-OLS data.
    
    If a TTL file is available, prefer auto-extracted values.
    Otherwise, fall back to BE-OLS values.
    
    Args:
        onto: Ontology dictionary
        
    Returns:
        None (modifies onto in place)
    """
    has_ttl = onto.get('ttl_exists', False)
    
    if has_ttl:
        # TTL file available - prefer auto-extracted values with BE-OLS fallback
        onto['prefix_final'] = onto.get('prefix_auto') if onto.get('prefix_auto') else onto.get('prefix_manual')
        onto['title_final'] = onto.get('title_auto') if onto.get('title_auto') else onto.get('title_manual')
        onto['description_final'] = onto.get('description_manual')  # Always use BE-OLS for description
        onto['created_final'] = onto.get('modified_auto') or onto.get('issued_auto') or onto.get('created_auto') or onto.get('created_manual')
        onto['license_final'] = onto.get('license_auto') if onto.get('license_auto') else onto.get('license_manual')
        onto['version_final'] = onto.get('version_auto') if onto.get('version_auto') else onto.get('version_manual')
        onto['linked_aeco_final'] = onto.get('linked_aeco_auto', [])
        onto['linked_upper_final'] = onto.get('linked_upper_auto', [])
        
        # New _final fields for serialization, documentation, annotation
        onto['serialization_final'] = onto.get('serialization_auto')
        onto['documentation_final'] = onto.get('documentation_manual')
        onto['annotation_final'] = onto.get('annotation_auto')
        onto['linked_by_final'] = onto.get('linked_by_auto')
        onto['linked_by_aeco_final'] = onto.get('linked_by_aeco_auto')

        # Fields with only manual values (no auto calculation)
        onto['FOOPs_final'] = onto.get('FOOPs_manual')
        onto['conforms_to_standards_final'] = onto.get('conforms_to_standards_manual')
        onto['conceptual_data_model_final'] = onto.get('conceptual_data_model_manual')
        onto['cluster_final'] = onto.get('cluster_manual')
        onto['reference_final'] = onto.get('reference_manual')
        onto['primary_domain_final'] = onto.get('primary_domain_manual')
        onto['secondary_domain_final'] = onto.get('secondary_domain_manual')

        # Count fields
        onto['classes_count_final'] = onto.get('classes_count_auto')
        onto['data_properties_count_final'] = onto.get('data_properties_count_auto')
        onto['object_properties_count_final'] = onto.get('object_properties_count_auto')

        # Creator and publisher
        onto['creator_final'] = onto.get('creator_auto')
        onto['publisher_final'] = onto.get('publisher_auto')
    else:
        # No TTL file - use BE-OLS values
        onto['prefix_final'] = onto.get('prefix_manual')
        onto['title_final'] = onto.get('title_manual')
        onto['description_final'] = onto.get('description_manual')
        onto['created_final'] = onto.get('created_manual')
        onto['license_final'] = onto.get('license_manual')
        onto['version_final'] = onto.get('version_manual')
        onto['linked_aeco_final'] = onto.get('linked_aeco_manual', '')
        onto['linked_upper_final'] = onto.get('linked_upper_manual', '')
        
        # New _final fields for serialization, documentation, annotation
        onto['serialization_final'] = onto.get('serialization_manual')
        onto['documentation_final'] = onto.get('documentation_manual')
        onto['annotation_final'] = onto.get('annotation_manual')
        onto['linked_by_final'] = onto.get('linked_by_manual')
        onto['linked_by_aeco_final'] = onto.get('linked_by_aeco_auto', [])

        # Fields with only manual values (no auto calculation)
        onto['FOOPs_final'] = onto.get('FOOPs_manual')
        onto['conforms_to_standards_final'] = onto.get('conforms_to_standards_manual')
        onto['conceptual_data_model_final'] = onto.get('conceptual_data_model_manual')
        onto['cluster_final'] = onto.get('cluster_manual')
        onto['reference_final'] = onto.get('reference_manual')
        onto['primary_domain_final'] = onto.get('primary_domain_manual')
        onto['secondary_domain_final'] = onto.get('secondary_domain_manual')

        # Count fields
        onto['classes_count_final'] = onto.get('classes_count_auto')
        onto['data_properties_count_final'] = onto.get('data_properties_count_auto')
        onto['object_properties_count_final'] = onto.get('object_properties_count_auto')

        # Creator and publisher
        onto['creator_final'] = onto.get('creator_auto')
        onto['publisher_final'] = onto.get('publisher_auto')
    
    # Convert empty lists to empty strings
    if onto['linked_aeco_final'] == [] or onto['linked_aeco_final'] is None:
        onto['linked_aeco_final'] = ''
    if onto['linked_upper_final'] == [] or onto['linked_upper_final'] is None:
        onto['linked_upper_final'] = ''
    
    # Convert None to empty string for documentation (since no auto value is calculated)
    if onto.get('documentation_final') is None:
        onto['documentation_final'] = ''

    # Normalize yes/no to TRUE/FALSE for consistency
    for field in ['serialization_final', 'documentation_final', 'annotation_final']:
        val = onto.get(field, '')
        if val and str(val).lower() in ['yes', 'true']:
            onto[field] = True
        elif val and str(val).lower() in ['no', 'false']:
            onto[field] = False


def calculate_annotation_coverage(ttl_file_path):
    """
    Analyze annotation coverage for Classes, Object Properties, Data Properties, and Annotation Properties.
    
    Checks whether defined elements have rdfs:comment or rdfs:label annotations.
    
    Args:
        ttl_file_path: Path to the TTL file
        
    Returns:
        dict with:
            - total_elements: total count of Classes + ObjectProperties + DataProperties + AnnotationProperties
            - annotated_elements: count of elements with at least one annotation
            - coverage_percent: percentage of annotated elements
            - has_annotations: 'yes' if coverage >= 50%, 'no' otherwise
    """
    g = Graph()
    g.parse(ttl_file_path, format='turtle')
    
    # Find the ontology's own namespace
    own_namespaces = set()
    for s in g.subjects(RDF.type, OWL.Ontology):
        uri = str(s)
        if uri.startswith('http://') or uri.startswith('https://'):
            if '#' in uri:
                own_namespaces.add(uri.rsplit('#', 1)[0] + '#')
            else:
                own_namespaces.add(uri.rsplit('/', 1)[0] + '/')
    
    # Also check @base and default prefix
    for prefix, ns in g.namespaces():
        ns_str = str(ns)
        if prefix == '' or prefix is None:
            own_namespaces.add(ns_str)
    
    def is_own_element(uri):
        """Check if URI belongs to this ontology"""
        uri_str = str(uri)
        for ns in own_namespaces:
            if uri_str.startswith(ns):
                return True
        return False
    
    def has_annotation(subject):
        """Check if subject has rdfs:comment"""
        for _ in g.objects(subject, RDFS.comment):
            return True
        return False
    
    # Collect all Classes, ObjectProperties, DataProperties defined in this ontology
    elements = []
    
    # Classes
    for s in g.subjects(RDF.type, OWL.Class):
        if is_own_element(s):
            elements.append(s)
    
    # Object Properties
    for s in g.subjects(RDF.type, OWL.ObjectProperty):
        if is_own_element(s):
            elements.append(s)
    
    # Data Properties
    for s in g.subjects(RDF.type, OWL.DatatypeProperty):
        if is_own_element(s):
            elements.append(s)
    
    # Annotation Properties
    for s in g.subjects(RDF.type, OWL.AnnotationProperty):
        if is_own_element(s):
            elements.append(s)
    
    total = len(elements)
    annotated = sum(1 for e in elements if has_annotation(e))
    
    coverage = (annotated / total * 100) if total > 0 else 0
    
    return {
        'total_elements': total,
        'annotated_elements': annotated,
        'coverage_percent': round(coverage, 1),
        'has_annotations': True if coverage >= 50 else False
    }


def calculate_linked_by(ontologies):
    """
    Calculate the 'linked_by' field for each ontology.
    
    An ontology is marked as linked_by (TRUE) if any other ontology in the BE-OLS
    list references it in their linked_aeco or linked_upper lists.
    
    Args:
        ontologies: List of ontology dictionaries
        
    Returns:
        None (modifies ontologies in place)
    """
    # Build a set of all prefixes
    all_prefixes = set()
    for onto in ontologies:
        prefix = onto.get('prefix_manual') or onto.get('prefix_auto')
        if prefix:
            all_prefixes.add(prefix.lower())
    
    # For each ontology, check if it's referenced by others
    for onto in ontologies:
        onto_prefix = onto.get('prefix_manual') or onto.get('prefix_auto')
        if not onto_prefix:
            onto['linked_by_auto'] = False
            continue
        
        onto_prefix_lower = onto_prefix.lower()
        is_reused = False
        
        # Check all other ontologies
        for other in ontologies:
            if other is onto:
                continue
            
            # Check linked_aeco (both auto-extracted and BE-OLS)
            linked_aeco = other.get('linked_aeco', [])
            if isinstance(linked_aeco, list):
                if onto_prefix_lower in [p.lower() for p in linked_aeco if p]:
                    is_reused = True
                    break
            
            linked_aeco_beols = other.get('linked_aeco_manual', '')
            if isinstance(linked_aeco_beols, str) and linked_aeco_beols:
                aeco_list = [p.strip().lower() for p in linked_aeco_beols.split(',')]
                if onto_prefix_lower in aeco_list:
                    is_reused = True
                    break
            
            # Check linked_upper (both auto-extracted and BE-OLS)
            linked_upper = other.get('linked_upper', [])
            if isinstance(linked_upper, list):
                if onto_prefix_lower in [p.lower() for p in linked_upper if p]:
                    is_reused = True
                    break
            
            linked_upper_beols = other.get('linked_upper_manual', '')
            if isinstance(linked_upper_beols, str) and linked_upper_beols:
                upper_list = [p.strip().lower() for p in linked_upper_beols.split(',')]
                if onto_prefix_lower in upper_list:
                    is_reused = True
                    break
        
        onto['linked_by_auto'] = True if is_reused else False

        # Build list of AECO ontologies that link to this one
        linked_by_aeco = []
        for other in ontologies:
            if other is onto:
                continue
            other_prefix = other.get('prefix_manual') or other.get('prefix_auto')
            if not other_prefix:
                continue
            # Check if other's linked_aeco_auto contains this ontology's prefix
            other_linked_aeco = other.get('linked_aeco_auto', [])
            if isinstance(other_linked_aeco, list):
                if onto_prefix_lower in [p.lower() for p in other_linked_aeco if p]:
                    linked_by_aeco.append(other_prefix)
        onto['linked_by_aeco_auto'] = linked_by_aeco


def calculate_scores(onto):
    """
    Calculate alignment, accessibility, and quality scores for an ontology.
    
    Outputs 6 fields:
    - linkage_to_upper: 'yes'/'no' - whether linked to upper ontologies
    - linkage_to_aeco: 'yes'/'no' - whether linked to AECO ontologies
    - linkage_to_aeco_meta: 'yes'/'no' - whether linked to meta schema ontologies (bot, brick, ifc4-add2)
    - score_alignment: 0-3 sum of above
    - score_accessibility: 0-3 based on data model, serialization, URI
    - score_quality: 0-2 based on documentation, annotations
    
    Args:
        onto: Ontology dictionary
        
    Returns:
        None (modifies onto in place)
    """
    meta_list = ['ph', 'brick', 'rec', 'bot', 'saref4bldg', 'sosa', 'ssn', 'db']
    
    # Alignment Score - use _final fields
    alignment = [0, 0, 0]
    
    # Check linked_upper_final
    linked_upper = onto.get('linked_upper_final', '')
    if linked_upper and linked_upper != 'n/a':
        if isinstance(linked_upper, list):
            if len(linked_upper) > 0:
                alignment[0] = 1
        elif isinstance(linked_upper, str) and linked_upper.strip():
            alignment[0] = 1
    
    # Check linked_aeco_final
    linked_aeco = onto.get('linked_aeco_final', '')
    if linked_aeco and linked_aeco != 'n/a':
        aeco_list = []
        if isinstance(linked_aeco, list):
            aeco_list = linked_aeco
        elif isinstance(linked_aeco, str) and linked_aeco.strip():
            aeco_list = [s.strip() for s in linked_aeco.split(',')]
        
        if len(aeco_list) > 0:
            alignment[1] = 1
            if any(s in aeco_list for s in meta_list):
                alignment[2] = 1
    
    onto['linkage_to_upper'] = True if alignment[0] == 1 else False
    onto['linkage_to_aeco'] = True if alignment[1] == 1 else False
    onto['linkage_to_aeco_meta'] = True if alignment[2] == 1 else False
    onto['score_alignment'] = sum(alignment)
    
    # Accessibility Score
    accessibility = [0, 0, 0]
    
    data_model = onto.get('conceptual_data_model_manual', '')
    if data_model and str(data_model).lower() in ['yes', 'true']:
        accessibility[0] = 1
    
    serialization = onto.get('serialization_final', '')
    if serialization and serialization == True or str(serialization).lower() in ['yes', 'true']:
        accessibility[1] = 1
    
    uri = onto.get('uri', '')
    if uri and uri != 'n/a' and uri != '':
        accessibility[2] = 1
    
    onto['score_accessibility'] = sum(accessibility)
    
    # Quality Score (0-2)
    quality = [0, 0]
    
    documentation = onto.get('documentation_final', '')
    if documentation and documentation == True or str(documentation).lower() in ['yes', 'true']:
        quality[0] = 1
    
    #annotation = onto.get('annotation_final', '')
    #if annotation and annotation == True or str(annotation).lower() in ['yes', 'true']:
    #    quality[1] = 1
    annotation_coverage = onto.get('annotation_coverage_percent', '')
    if annotation_coverage:
        quality[1] = round(annotation_coverage / 100.0,1)
    
    onto['score_quality'] = sum(quality)



def process_ontologies(excel_file, ttl_folder, debug=False):
    """
    Process all ontologies listed in the Excel file.
    
    Args:
        excel_file: Path to Excel file with ontology list
        ttl_folder: Path to folder containing TTL files
        debug: If True, print debug information for prefix extraction
        
    Returns:
        ontologies: the list of ontologies from excel_file augmented with new data
    """
    df = pd.read_excel(excel_file)
    
    ontologies = []
    ttl_folder_path = Path(ttl_folder)
    
    for idx, row in df.iterrows():
        prefix_manual = row['prefix'] if pd.notna(row['prefix']) else "no_prefix"
        uri = row['uri'] if pd.notna(row['uri']) else ""
        
        title_manual = row['title'] if 'title' in row and pd.notna(row['title']) else None
        cluster_manual = row['cluster'] if 'cluster' in row and pd.notna(row['cluster']) else None
        reference_manual = row['reference'] if 'reference' in row and pd.notna(row['reference']) else None
        primary_domain_manual = row['primary_domain'] if 'primary_domain' in row and pd.notna(row['primary_domain']) else None
        secondary_domain_manual = row['secondary_domain'] if 'secondary_domain' in row and pd.notna(row['secondary_domain']) else None
        conceptual_data_model_manual = row['conceptual_data_model'] if 'conceptual_data_model' in row and pd.notna(row['conceptual_data_model']) else None
        serialization_manual = row['serialization'] if 'serialization' in row and pd.notna(row['serialization']) else None
        documentation_manual = row['documentation'] if 'documentation' in row and pd.notna(row['documentation']) else None
        annotation_manual = row['annotation'] if 'annotation' in row and pd.notna(row['annotation']) else None
        linked_by_manual = row['linked_by'] if 'linked_by' in row and pd.notna(row['linked_by']) else None
        FOOPs_manual = row['FOOPs'] if 'FOOPs' in row and pd.notna(row['FOOPs']) else None
        conforms_to_standards_manual = row['conforms_to_standards'] if 'conforms_to_standards' in row and pd.notna(row['conforms_to_standards']) else None
        
        version_manual = row['version'] if 'version' in row and pd.notna(row['version']) else None
        created_manual = row['created'] if 'created' in row and pd.notna(row['created']) else None
        license_manual = row['license'] if 'license' in row and pd.notna(row['license']) else None
        description_manual = row['description'] if 'description' in row and pd.notna(row['description']) else None
        linked_aeco_manual = row['linked_aeco'] if 'linked_aeco' in row and pd.notna(row['linked_aeco']) else None
        linked_upper_manual = row['linked_upper'] if 'linked_upper' in row and pd.notna(row['linked_upper']) else None
        
        result = {
            'uri': uri,
            'ttl_exists': False,
            'error': '',
            'prefix_manual': prefix_manual,
            'prefix_auto': None,
            'prefix_final': None,
            'title_manual': title_manual,
            'title_auto': None,
            'title_final': None,
            'description_manual': description_manual,
            'description_auto': None,
            'description_final': None,
            'created_manual': created_manual,
            'created_auto': None,
            'issued_auto': None,
            'modified_auto': None,
            'created_final': None,
            'license_manual': license_manual,
            'license_auto': None,
            'license_final': None,
            'version_manual': version_manual,
            'version_auto': None,
            'version_final': None,
            'linked_aeco_manual': linked_aeco_manual,
            'linked_aeco_auto': [],
            'linked_aeco_final': None,
            'linked_upper_manual': linked_upper_manual,
            'linked_upper_auto': [],
            'linked_upper_final': None,
            'serialization_manual': serialization_manual,
            'serialization_auto': False,
            'serialization_final': None,
            'documentation_manual': documentation_manual,
            'documentation_auto': None,
            'documentation_final': None,
            'annotation_manual': annotation_manual,
            'annotation_auto': None,
            'annotation_final': None,
            'annotation_coverage_percent': None,
            'linked_by_manual': linked_by_manual,
            'linked_by_auto': None,
            'linked_by_final': None,
            'linked_by_aeco_auto': [],
            'linked_by_aeco_final': None,
            'FOOPs_manual': FOOPs_manual,
            'FOOPs_auto': None,
            'FOOPs_final': None,
            'conforms_to_standards_manual': conforms_to_standards_manual,
            'conforms_to_standards_auto': None,
            'conforms_to_standards_final': None,
            'conceptual_data_model_manual': conceptual_data_model_manual,
            'conceptual_data_model_auto': None,
            'conceptual_data_model_final': None,
            'cluster_manual': cluster_manual,
            'cluster_auto': None,
            'cluster_final': None,
            'reference_manual': reference_manual,
            'reference_auto': None,
            'reference_final': None,
            'primary_domain_manual': primary_domain_manual,
            'primary_domain_auto': None,
            'primary_domain_final': None,
            'secondary_domain_manual': secondary_domain_manual,
            'secondary_domain_auto': None,
            'secondary_domain_final': None,
            'creator_auto': [],
            'creator_final': None,
            'publisher_auto': None,
            'publisher_final': None,
            'referenced_ontologies': [],
            'classes_count_auto': None,
            'classes_count_final': None,
            'data_properties_count_auto': None,
            'data_properties_count_final': None,
            'object_properties_count_auto': None,
            'object_properties_count_final': None,
        }

        # Normalize yes/no to boolean for _manual fields
        for field in ['conceptual_data_model_manual', 'serialization_manual', 'documentation_manual', 'annotation_manual', 'linked_by_manual']:
            val = result.get(field)
            if val is not None:
                if str(val).lower() in ['yes', 'true']:
                    result[field] = True
                elif str(val).lower() in ['no', 'false']:
                    result[field] = False
        
        ttl_file = ttl_folder_path / f"{prefix_manual}.ttl"

        if ttl_file.exists():
            result['ttl_exists'] = True
            
            print(f"\nProcessing {prefix_manual}.ttl...")
            try:
                referenced_ontologies = extract_ontology_uris(ttl_file, debug=debug)
                metadata = extract_ontology_metadata(ttl_file)

                print(f" Total referenced ontologies found: {len(referenced_ontologies)}")
                if debug:
                    for ref in referenced_ontologies:
                        print(f"   - {ref['prefix_auto']}: {ref['ontology_base']}")
                    
                result['prefix_auto'] = metadata['preferred_prefix']
                result['title_auto'] = metadata['title']
                result['version_auto'] = metadata['version']
                result['created_auto'] = metadata['created']
                result['issued_auto'] = metadata['issued']
                result['modified_auto'] = metadata['modified']
                result['creator_auto'] = metadata['creator']
                result['license_auto'] = metadata['license']
                result['description_auto'] = metadata['description']
                result['publisher_auto'] = metadata['publisher']

                result['classes_count_auto'] = metadata['classes_count']
                result['data_properties_count_auto'] = metadata['data_properties_count']
                result['object_properties_count_auto'] = metadata['object_properties_count']

                # Extract annotation coverage
                annotation_info = calculate_annotation_coverage(ttl_file)
                result['annotation_auto'] = annotation_info['has_annotations']
                result['annotation_coverage_percent'] = annotation_info['coverage_percent']
                
                # Set serialization based on TTL existence
                result['serialization_auto'] = True
                result['referenced_ontologies'] = referenced_ontologies
                ontologies.append(result)

            except Exception as e:
                print(f"Error processing {prefix_manual}.ttl: {e}")
                result['error'] = str(e)
                ontologies.append(result)
        else:
            print(f"Warning: {ttl_file} not found")
            result['error'] = 'File not found'
            ontologies.append(result) 

    # Build URI to prefix mappings
    uri_to_prefix = build_uri_prefix_mapping(ontologies)
    extended_uri_to_prefix = build_extended_uri_prefix_mapping(ontologies)
    
    if debug:
        print("\nURI to prefix mapping (from BE-OLS):")
        for uri, prefix in sorted(uri_to_prefix.items()):
            print(f"  {uri} -> {prefix}")

    # Fix the missing prefixes
    for onto in ontologies:
        fix_referenced_ontology_prefixes(onto, extended_uri_to_prefix)

    # Create linked_aeco and linked_upper lists
    print("\n" + "="*60)
    print("Creating linked ontology lists...")
    print("="*60)
    for onto in ontologies:
        linked_aeco, linked_upper = create_linked_ontology_lists(onto, uri_to_prefix, debug=debug)        
        onto['linked_aeco_auto'] = linked_aeco
        onto['linked_upper_auto'] = linked_upper

    # Compare auto-extracted vs BE-OLS data
    print("\n" + "="*60)
    print("COMPARISON: Auto-extracted vs BE-OLS data")
    print("="*60)
    for onto in ontologies:
        compare_auto_and_manual(onto)
    
    # Create unified columns
    for onto in ontologies:
        create_unified_columns(onto)

    # Calculate linked_by field (must be done before create_final_fields)
    calculate_linked_by(ontologies)

    # Create final fields by merging auto-extracted and BE-OLS data
    for onto in ontologies:
        create_final_fields(onto)


    # Calculate alignment, accessibility, and quality scores
    for onto in ontologies:
        calculate_scores(onto)

    return ontologies



def write_output_JSON(ontologies, output_json):
    """Write ontologies data to JSON file."""
    with open(output_json, 'w', encoding='utf-8') as f:
        json.dump(ontologies, f, indent=2, ensure_ascii=False)
    
    print(f"\nJSON processing complete. Results saved to {output_json}")

def write_output_EXCEL(ontologies, output_excel):
    """Write ontologies data to Excel file."""
    excel_data = []
    for result in ontologies:
        excel_row = result.copy()
        
        if 'creator_auto' in excel_row and isinstance(excel_row['creator_auto'], list):
            excel_row['creator_auto'] = ', '.join(excel_row['creator_auto']) if excel_row['creator_auto'] else ''
        
        if 'referenced_ontologies' in excel_row and isinstance(excel_row['referenced_ontologies'], list):
            formatted_refs = []
            for ref in excel_row['referenced_ontologies']:
                if isinstance(ref, dict):
                    prefix_val = ref.get('prefix_auto', 'None')
                    uri_val = ref.get('ontology_base', '')
                    formatted_refs.append(f"{{{prefix_val}: {uri_val}}}")
                else:
                    formatted_refs.append(str(ref))
            excel_row['referenced_ontologies'] = ', '.join(formatted_refs)
        
        if 'linked_aeco_auto' in excel_row and isinstance(excel_row['linked_aeco_auto'], list) and excel_row['linked_aeco_auto']:
            excel_row['linked_aeco_auto'] = ', '.join(str(n) for n in excel_row['linked_aeco_auto'] if n is not None)
        else:
            excel_row['linked_aeco_auto'] = ''

        if 'linked_upper_auto' in excel_row and isinstance(excel_row['linked_upper_auto'], list) and excel_row['linked_upper_auto']:
            excel_row['linked_upper_auto'] = ', '.join(str(n) for n in excel_row['linked_upper_auto'] if n is not None)
        else:
            excel_row['linked_upper_auto'] = ''

        # Handle _final list fields
        if 'linked_aeco_final' in excel_row and isinstance(excel_row['linked_aeco_final'], list):
            excel_row['linked_aeco_final'] = ', '.join(str(n) for n in excel_row['linked_aeco_final'] if n is not None) if excel_row['linked_aeco_final'] else ''
        if 'linked_by_aeco_auto' in excel_row and isinstance(excel_row['linked_by_aeco_auto'], list) and excel_row['linked_by_aeco_auto']:
            excel_row['linked_by_aeco_auto'] = ', '.join(str(n) for n in excel_row['linked_by_aeco_auto'] if n is not None)
        else:
            excel_row['linked_by_aeco_auto'] = ''

        if 'linked_by_aeco_final' in excel_row and isinstance(excel_row['linked_by_aeco_final'], list) and excel_row['linked_by_aeco_final']:
            excel_row['linked_by_aeco_final'] = ', '.join(str(n) for n in excel_row['linked_by_aeco_final'] if n is not None)
        else:
            excel_row['linked_by_aeco_final'] = ''

        if 'linked_upper_final' in excel_row and isinstance(excel_row['linked_upper_final'], list):
            excel_row['linked_upper_final'] = ', '.join(str(n) for n in excel_row['linked_upper_final'] if n is not None) if excel_row['linked_upper_final'] else ''

        excel_data.append(excel_row)
    
    df_output = pd.DataFrame(excel_data)
    df_output.to_excel(output_excel, sheet_name='Data', index=False, engine='openpyxl')
    
    print(f"Excel processing complete. Results saved to {output_excel}")
    print(f"Total ontologies processed: {len(ontologies)}")



In [4]:
# Configure paths and run processing
filepath = Path(file_path_ontologies)
ontologies_ttl_folder = filepath.parent / "Ontologies_TTL"

# Process all ontologies (set debug=True to see detailed prefix/URI matching)
ontologies = process_ontologies(file_path_ontologies, ontologies_ttl_folder, debug=False)

# Write outputs
output_excel = filepath.parent / "Ontologies_forRepo.xlsx"
write_output_EXCEL(ontologies, output_excel)
#output_json = filepath.parent / "Ontologies_forRepo.json"
#write_output_JSON(ontologies, output_json)

  for idx, row in parser.parse():



Processing aec3po.ttl...
 Total referenced ontologies found: 25

Processing aic.ttl...
 Total referenced ontologies found: 6

Processing ao.ttl...
 Total referenced ontologies found: 9

Processing aoi.ttl...
 Total referenced ontologies found: 7

Processing asbingowl.ttl...
 Total referenced ontologies found: 7

Processing bao.ttl...
 Total referenced ontologies found: 9

Processing bcao.ttl...
 Total referenced ontologies found: 4

Processing bcfowl.ttl...
 Total referenced ontologies found: 11

Processing bem-reno.ttl...
 Total referenced ontologies found: 6

Processing beo.ttl...
 Total referenced ontologies found: 13

Processing bm.ttl...
 Total referenced ontologies found: 7

Processing bop.ttl...
 Total referenced ontologies found: 8

Processing bot.ttl...
 Total referenced ontologies found: 17

Processing bpo.ttl...
 Total referenced ontologies found: 12

Processing brick.ttl...
 Total referenced ontologies found: 47

Processing brot.ttl...
 Total referenced ontologies found: 7

Failed to convert Literal lexical form to value. Datatype=http://www.w3.org/2001/XMLSchema#date, Converter=<function parse_xsd_date at 0x000001C6A9CE1C60>
Traceback (most recent call last):
  File "C:\Users\fbosche\AppData\Roaming\Python\Python311\site-packages\rdflib\term.py", line 2262, in _castLexicalToPython
    return conv_func(lexical)  # type: ignore[arg-type]
           ^^^^^^^^^^^^^^^^^^
  File "C:\Users\fbosche\AppData\Roaming\Python\Python311\site-packages\rdflib\xsd_datetime.py", line 586, in parse_xsd_date
    raise ValueError("XSD Date string must contain at least two dashes")
ValueError: XSD Date string must contain at least two dashes


 Total referenced ontologies found: 9

Processing seas.ttl...
 Total referenced ontologies found: 16

Processing sosa.ttl...
 Total referenced ontologies found: 13

Processing ssn.ttl...
 Total referenced ontologies found: 12

Processing stg.ttl...


Failed to convert Literal lexical form to value. Datatype=http://www.w3.org/2001/XMLSchema#date, Converter=<function parse_xsd_date at 0x000001C6A9CE1C60>
Traceback (most recent call last):
  File "C:\Users\fbosche\AppData\Roaming\Python\Python311\site-packages\rdflib\term.py", line 2262, in _castLexicalToPython
    return conv_func(lexical)  # type: ignore[arg-type]
           ^^^^^^^^^^^^^^^^^^
  File "C:\Users\fbosche\AppData\Roaming\Python\Python311\site-packages\rdflib\xsd_datetime.py", line 586, in parse_xsd_date
    raise ValueError("XSD Date string must contain at least two dashes")
ValueError: XSD Date string must contain at least two dashes
Failed to convert Literal lexical form to value. Datatype=http://www.w3.org/2001/XMLSchema#date, Converter=<function parse_xsd_date at 0x000001C6A9CE1C60>
Traceback (most recent call last):
  File "C:\Users\fbosche\AppData\Roaming\Python\Python311\site-packages\rdflib\term.py", line 2262, in _castLexicalToPython
    return conv_func(lexica

 Total referenced ontologies found: 7

Processing unocs.ttl...
 Total referenced ontologies found: 28

Processing wgs84.ttl...
 Total referenced ontologies found: 6

Creating linked ontology lists...

COMPARISON: Auto-extracted vs BE-OLS data
aec3po :
 Comparison prefix:
  Same
 Comparison title:
  Different: AEC3PO | Architecture, Engineering, Construction, Compliance Checking and Permitting Ontology (AEC3PO)
 Comparison version:
  Same
 Comparison license:
  Same (None)
 Comparison description:
  Different: The Architecture, Engineering and Construction Compliance Checking and Permitting ontology (AEC3PO), based on the statements on building compliance-related document and design codes. | The Architecture, Engineering, Construction, Compliance Checking and Permitting Ontology (AEC3PO) is an ontology developed to support the automated compliance checking of construction, renovation, and demolition works. It has been developed in the context of the Automated Compliance Checking for Con

In [None]:
foops_url = "https://foops.linkeddata.es/assessOntology"

foops_headers = {
    "accept": "application/json;charset=UTF-8",
    "Content-Type": "application/json;charset=UTF-8",
}

def get_foops_score(uri):
    auto_uri = ""
    auto_title = ""
    foops_score = -1.0

    if uri == 'n/a' or pd.isnull(uri):
        foops_score = 0.0
    else:
        try:
            foops_data = {"ontologyUri": uri}
            response = requests.post(foops_url, headers=foops_headers, json=foops_data)
            response = response.json()
            print(response)
         
            if 'ontology_URI' in response:
                auto_uri = response['ontology_URI']
                auto_title = response['ontology_title']
                foops_score = response['overall_score']
                #checks = response['checks']
            else:
                print("FOOPs Internal Server Error")

        except requests.exceptions.HTTPError as e:
            print("HTTP error:", e.response.status_code, e.response.text)

        except requests.exceptions.ConnectionError:
            print("Error: Failed to connect to the server")

        except requests.exceptions.Timeout:
            print("Error: Request timed out")

        except requests.exceptions.RequestException as e:
            print("Unexpected error:", str(e))

    print(f"  auto_uri: {auto_uri}; auto_title: {auto_title}; foops_score: {foops_score}")

    return foops_score

def process_ontologies_foops(ontologies, debug=False):

    for index, onto in enumerate(ontologies):
        #if index > 5:
        #    continue

        prefix = onto['prefix_final']
        uri = onto['uri']
        title = onto['title_final']

        print(f"prefix: {prefix}; title: {title}; uri: {uri}")
        
        foops_score = round(get_foops_score(uri),2)
        
        onto['FOOPs_auto'] = foops_score
        if foops_score == -1.0:
            onto['FOOPs_auto'] = None
            onto['FOOPs_final'] = onto['FOOPs_manual']
        else:
            onto['FOOPs_auto'] = foops_score
            onto['FOOPs_final'] = foops_score
    
    return ontologies


In [None]:
# Process all ontologies (set debug=True to see detailed prefix/URI matching)
ontologies = process_ontologies_foops(ontologies, debug=False)

# Write outputs
filepath = Path(file_path_ontologies)
output_excel = filepath.parent / "Ontologies_forRepo.xlsx"
write_output_EXCEL(ontologies, output_excel)
#output_json = filepath.parent / "Ontologies_forRepo.json"
#write_output_JSON(ontologies, output_json)
