In [6]:
import re

def revert_numeric_IRI(lines):
    """
    Processes definition blocks to replace numeric IRIs with their original labels.
    It builds a mapping from the 5-digit numeric IRI to the label-based IRI (with underscores)
    and removes the rdfs:label line.
    
    Returns:
        new_lines: List of updated lines (with label lines removed)
        iri_mapping: Dict mapping numeric IRIs (e.g. "00001") to label IRIs (e.g. "Some_Class")
    """
    new_lines = []
    iri_mapping = {}
    i = 0
    
    # Pattern to match a definition line starting with a numeric IRI.
    # E.g., ":00001 rdf:type owl:Class ;"
    def_pattern = re.compile(r"^:(\d{5})(\s+rdf:type.*)")
    # Pattern to match the following rdfs:label line.
    # E.g., "       rdfs:label "Some Class" ;" or "       rdfs:label "Some Class" ."
    label_pattern = re.compile(r'^\s*rdfs:label\s+"([^"]+)"\s*([;.])')
    
    while i < len(lines):
        line = lines[i]
        def_match = def_pattern.match(line)
        if def_match:
            numeric_iri = def_match.group(1)  # e.g. "00001"
            rest_of_line = def_match.group(2)
            # Check if the next line exists and is a label line.
            if i + 1 < len(lines):
                label_line = lines[i+1]
                label_match = label_pattern.match(label_line)
                if label_match:
                    label_text = label_match.group(1)  # e.g. "Some Class"
                    punctuation = label_match.group(2)   # either ';' or '.'
                    # Convert label text to IRI-friendly format: replace spaces with underscores.
                    new_iri = label_text.replace(" ", "_")
                    # Store the mapping from numeric IRI to new IRI.
                    iri_mapping[numeric_iri] = new_iri
                    # Update the definition line by replacing the numeric IRI with the new IRI.
                    new_line = f":{new_iri}{rest_of_line}"
                    # For definitions that ended with a period (via the label line),
                    # adjust the punctuation on the definition line if needed.
                    if punctuation == '.':
                        # If the definition line ends with a semicolon, replace it with a period.
                        new_line = re.sub(r"\s*;\s*$", " .", new_line)
                    new_lines.append(new_line + "\n")
                    i += 2  # Skip the label line.
                    continue
        # For lines that don't match a definition (or have no associated label), add them unchanged.
        new_lines.append(line)
        i += 1
    return new_lines, iri_mapping

def update_references(lines, iri_mapping):
    """
    Scans through the lines to update any references to numeric IRIs
    (e.g. :00001) to the corresponding label-based IRIs.
    """
    updated_lines = []
    # This regex finds occurrences of : followed by exactly 5 digits.
    pattern = re.compile(r":(\d{5})(\b)")
    
    for line in lines:
        def replace_numeric(match):
            numeric = match.group(1)
            if numeric in iri_mapping:
                return f":{iri_mapping[numeric]}{match.group(2)}"
            return match.group(0)
        
        updated_line = pattern.sub(replace_numeric, line)
        updated_lines.append(updated_line)
    return updated_lines

def main():
    input_path = "../Utility/test.ttl"
    output_path = "../Utility/reverted.ttl"
    
    with open(input_path, "r", encoding="utf8") as infile:
        lines = infile.readlines()
    
    # First pass: revert the definitions and capture the mapping.
    reverted_lines, iri_mapping = revert_numeric_IRI(lines)
    
    # Second pass: update any numeric references elsewhere in the file.
    final_lines = update_references(reverted_lines, iri_mapping)
    
    with open(output_path, "w", encoding="utf8") as outfile:
        outfile.writelines(final_lines)

if __name__ == '__main__':
    main()

All done
