In [3]:
import re

def parse_regions_to_rdf(input_file, output_file):
    # Define prefixes for RDF file
    prefixes = """
@prefix region: <http://example.org/pokemon/region/> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix schema: <http://schema.org/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

"""
    # Initialize RDF content
    rdf_entries = [prefixes]

    # Read the input file
    with open(input_file, 'r', encoding='utf-8') as file:
        lines = file.readlines()

    current_region = None
    games = []
    description = None

    for line in lines:
        line = line.strip()

        # Match region name
        if line.startswith("==") and line.endswith("=="):
            if current_region:
                # Write the previous region's data
                rdf_entry = generate_region_rdf(current_region, games, description)
                rdf_entries.append(rdf_entry)
                games = []
                description = None
            
            current_region = re.sub(r"==", "", line).strip()

        # Match games
        elif line.startswith("*"):
            game = line.lstrip("*").strip()
            games.append(game)

        # Match description (fallback for other lines)
        elif line:
            description = line if not description else f"{description} {line}"

    # Final region write
    if current_region:
        rdf_entry = generate_region_rdf(current_region, games, description)
        rdf_entries.append(rdf_entry)

    # Write output to the TTL file
    with open(output_file, 'w', encoding='utf-8') as outfile:
        outfile.write("\n\n".join(rdf_entries))

def generate_region_rdf(region_name, games, description):
    # Create a valid RDF entry for the region
    games_list = ", ".join([f'"{game}"' for game in games]) if games else '""'
    description = f'"{description}"' if description else '""'

    rdf_entry = f"""
region:{region_name.replace(" ", "_")} a schema:Place ;
    rdfs:label "{region_name}" ;
    schema:description {description} ;
    schema:game [{games_list}] .
"""
    return rdf_entry.strip()

# Usage
input_file = "region.txt"
output_file = "regions_output.ttl"
parse_regions_to_rdf(input_file, output_file)


In [13]:
from pyshacl import validate

def validate_rdf_with_shacl(rdf_file, shacl_file):
    """
    Validate an RDF file against a SHACL file.

    Parameters:
        rdf_file (str): Path to the RDF file to validate.
        shacl_file (str): Path to the SHACL file.

    Returns:
        None
    """
    try:
        with open(rdf_file, 'r', encoding='utf-8') as rdf:
            rdf_data = rdf.read()
        with open(shacl_file, 'r', encoding='utf-8') as shacl:
            shacl_data = shacl.read()

        # Validate the RDF file against the SHACL file
        conforms, report_graph, report_text = validate(
            data_graph=rdf_data,
            shacl_graph=shacl_data,
            inference='rdfs',
            debug=False,
            serialize_report_graph=True
        )

        if conforms:
            print("RDF data conforms to SHACL shapes.")
        else:
            print("RDF data does NOT conform to SHACL shapes.")
            print(report_text)

        # Save the report to a file if needed
        with open("validation_report.ttl", "w", encoding='utf-8') as report_file:
            report_file.write(report_graph.decode("utf-8"))

    except Exception as e:
        print(f"Error during validation: {e}")

# File paths
rdf_file = "regions_output.ttl"
shacl_file = "shacl.ttl"

# Validate the RDF file
validate_rdf_with_shacl(rdf_file, shacl_file)


RDF data conforms to SHACL shapes.
