In [27]:
import re
import urllib.parse

def process_locations_with_encoded_sameas_and_uri_fix(input_file, output_file):
    with open(input_file, 'r', encoding='utf-8') as infile:
        lines = infile.readlines()

    output_lines = [
        "@prefix location: <http://example.org/pokemon/location/> .",
        "@prefix region: <http://example.org/pokemon/region/> .",
        "@prefix pokemon: <http://example.org/pokemon/> .",
        "@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .",
        "@prefix owl: <http://www.w3.org/2002/07/owl#> .",
        "@prefix bulbapedia: <https://bulbapedia.bulbagarden.net/wiki/> .",
        ""
    ]

    current_region = None
    location_lines = []
    for line in lines:

        # Match location
        location_match = re.match(r'<http://example.org/pokemon/location/([^>]+)>', line)
        if location_match:
            # Finish processing previous location
            if location_lines:
                output_lines.extend(location_lines)
                location_lines = []

            location_name = location_match.group(1)
            
            # Fix the location URI by removing problematic characters
            fixed_location_name = re.sub(r"[^\w_]", "_", location_name)  # Replace non-alphanumeric characters with underscores
            location_uri = f"location:{fixed_location_name}"
            location_lines.append(f"{location_uri} a pokemon:Location ;")

            # Encode the name for Bulbapedia link
            encoded_name = urllib.parse.quote(location_name)
            location_lines.append(f"    owl:sameAs <https://bulbapedia.bulbagarden.net/wiki/{encoded_name}> ;")


            continue

        # Add other triples to the current location
        if line.strip() and not line.startswith('<'):
            location_lines.append(f"    {line.strip()}")

    # Add the last location
    if location_lines:
        output_lines.extend(location_lines)

    # Write the output to the file
    with open(output_file, 'w', encoding='utf-8') as outfile:
        outfile.write("\n".join(output_lines))

# Define input and output files
input_file = "locations_kg.ttl"
output_file = "locations_kg.ttl"

# Process the file
process_locations_with_encoded_sameas_and_uri_fix(input_file, output_file)


In [1]:
from pyshacl import validate

def validate_kg_with_shacl(kg_file, shacl_file):
    """
    Validates a knowledge graph (KG) against SHACL shapes.
    
    :param kg_file: Path to the KG file (Turtle format)
    :param shacl_file: Path to the SHACL shapes file (Turtle format)
    :return: Conformance result as a string ("Valid" or "Invalid")
    """
    with open(kg_file, 'r', encoding='utf-8') as kg, open(shacl_file, 'r', encoding='utf-8') as shacl:
        kg_data = kg.read()
        shacl_data = shacl.read()

    is_conform, _, _ = validate(
        data_graph=kg_data,
        shacl_graph=shacl_data,
        data_graph_format="turtle",
        shacl_graph_format="turtle",
        inference="rdfs"  # Enable RDFS inference if needed
    )

    return "Valid" if is_conform else "Invalid"

# Define file paths
kg_file = "locations_kg.ttl"  
shacl_file = "location_shapes.ttl"  

# Validate KG
conformance_result = validate_kg_with_shacl(kg_file, shacl_file)

# Output conformance result
print(conformance_result)


Valid
