# Change IRI's to numbers
The following script will go through an .ttl file and change any class IRI to a 5 digit number starting from one. The original IRI name is than saved as an rdfs label. 

In [1]:
import re

def rename_lines(owl_type, lines, start_number):
    """
    Processes lines to replace class definitions for a given owl_type.
    
    Returns:
        mapping: Dictionary mapping original names to new numbers.
        new_lines: List of updated lines.
        next_number: The next available number after processing.
    """
    mapping = {}
    new_lines = []
    number = start_number

    for line in lines:
        # For definitions with properties (ending with ';')
        if f" rdf:type {owl_type} ;" in line:
            # Extract name from the line (assumes name is before rdf:type)
            name = line.split(f" rdf:type {owl_type}")[0].lstrip(":").strip()
            new_num = str(number).zfill(5)
            # Build the new line with a label
            new_line = (
                f":{new_num} rdf:type {owl_type} ;\n"
                f"\t\t\t\t   rdfs:label \"{name.replace('_', ' ')}\" ;\n"
            )
            mapping[name] = new_num
            new_lines.append(new_line)
            number += 1
        # For definitions that end with a period
        elif f" rdf:type {owl_type} ." in line:
            name = line.split(f" rdf:type {owl_type}")[0].lstrip(":").strip()
            new_num = str(number).zfill(5)
            new_line = (
                f":{new_num} rdf:type {owl_type} ;\n"
                f"\t\t\t\t   rdfs:label \"{name.replace('_', ' ')}\" .\n"
            )
            mapping[name] = new_num
            new_lines.append(new_line)
            number += 1
        else:
            new_lines.append(line)
    return mapping, new_lines, number

def update_references(lines, class_mapping):
    """
    Updates any references to original class names in the lines using regex.
    """
    updated_lines = []
    # Pre-compile regex patterns for each class name for efficiency
    patterns = {
        name: re.compile(rf"(?i):{re.escape(name)}([\s\n])")
        for name in class_mapping
    }
    
    for line in lines:
        for name, new_num in class_mapping.items():
            # Replace class name references with the new number
            line = patterns[name].sub(f":{new_num}\\1", line)
        updated_lines.append(line)
    return updated_lines

def main():
    input_path = "../OWL/TWONTO.ttl"
    output_path = "../Utility/test.ttl"
    owl_types = ["owl:Class", "owl:DatatypeProperty", "owl:ObjectProperty"]
    
    # Read the file content
    with open(input_path, "r", encoding="utf8") as infile:
        lines = infile.readlines()
    
    overall_mapping = {}
    start_number = 1
    
    # Process each owl_type one after another
    for owl in owl_types:
        mapping, lines, start_number = rename_lines(owl, lines, start_number)
        overall_mapping.update(mapping)
    
    # Update any remaining references in the file using the mapping
    final_lines = update_references(lines, overall_mapping)
    
    # Write the updated content to the output file
    with open(output_path, "w", encoding="utf8") as outfile:
        outfile.writelines(final_lines)

if __name__ == '__main__':
    main()
