In [101]:
import os

# Get the directory of the current script
base_dir = os.getcwd()
try:
    if folder:
        base_dir = os.path.join(base_dir, folder)
except:
    pass

In [102]:
!pip install rdflib > /dev/null
!pip install owlrl > /dev/null
!pip install pyshacl > /dev/null


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [103]:
from rdflib import *
from owlrl import *
import json
import pyshacl

In [104]:
tbox = Namespace('http://www.semanticweb.org/acraf/ontologies/2024/healthmesh/tbox#')
abox = Namespace('http://www.semanticweb.org/acraf/ontologies/2024/healthmesh/abox#')
dcat = Namespace('https://www.w3.org/ns/dcat#')
dcterms = Namespace('http://purl.org/dc/terms/')
tb = Namespace("http://www.semanticweb.org/acraf/ontologies/2021/0/SDM#")
odrl = Namespace("http://www.w3.org/ns/odrl/2/")
prov = Namespace("http://www.w3.org/ns/prov#")
dqv = Namespace("http://www.w3.org/ns/dqv#")

# LOAD CONTRACT

In [105]:
contract = Graph()
contract.parse(os.path.join(base_dir, "../../../FederatedComputationalGovernance/SemanticDataModel/sdm.ttl"))

<Graph identifier=N46d26265da12426eb857bf1a5ba607f7 (<class 'rdflib.graph.Graph'>)>

# PARSER CONTRACT

Given a Data Product, Output a DC IRs

In [106]:
import uuid

def generate_unique_uri(base_uri):
    unique_identifier = str(uuid.uuid4())
    return URIRef(f"{base_uri}{unique_identifier}")

In [107]:
def add_jsonld_instances(graph, path):
    # Adds JSON-LD instances to the graph
    with open(path, 'r') as f:
        json_ld_data = json.loads(f.read())
        instances = Graph().parse(data=json_ld_data, format='json-ld')
        graph += instances
    
    return graph

In [108]:
class PolicyChecker(Graph):
    
    """ Create Policy Checker """
    def __init__(self, p, dp, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.p = p
        self.bind("ab", "http://www.semanticweb.org/acraf/ontologies/2024/healthmesh/abox#")
        self.bind("tb", "http://www.semanticweb.org/acraf/ontologies/2024/healthmesh/tbox#")
        self.URI = generate_unique_uri(abox)

        self.add((self.URI, RDF.type, tbox.PolicyChecker))
        self.add((self.URI, tbox.accordingTo, p))
        self.add((self.URI, tbox.validates, abox[dp]))
        #self.p_type = p_type.split("/")[-1]
                
    def get_URI(self):
        return self.URI
    
    def get_policy_type(self):
        return self.p_type
    
    def get_policy(self):
        return self.p
        

In [109]:

class DCParser:
    """
    Parse Policies of Data Contracts to Policy Checkers
    """

    def __init__(self, dp, graph):
        self.dp = dp
        self.g = graph
        self.attr_mappings = None

    def _validate_graph(self) -> bool:
        from pyshacl import validate
        shapes = Graph().parse(os.path.join(base_dir, 'policy_grammar.json'), format="turtle")
        conforms, report_graph, report_text = validate(self.g,shacl_graph=shapes)
        #return boolean
        return conforms

    def _read_contracts(self):
        """
        Get the policies associated with a data product
        :return: list of policies
        """
        contracts = self.g.objects(subject=abox[self.dp],predicate=tbox.hasDC)
        policies_list = []
        mappings_dict = {}
        for contract in contracts:
            # handle policies
            policies = self.g.objects(subject=contract,predicate=tbox.hasPolicy)
            for policy in policies:
                policies_list.append(policy)
            # handle mappings
            mappings = self.g.objects(subject=contract,predicate=tbox.hasMapping)
            for mapping in mappings:
                mfrom = self.g.value(subject=mapping,predicate=tbox.mfrom)
                mto = self.g.value(subject=mapping,predicate=tbox.mto)
                mappings_dict[mto] = mfrom

        self.attr_mappings = mappings_dict
        return policies_list, mappings_dict

    def executRule(self, rule_path, pc, mappings):

        for sparqlrule in os.listdir(rule_path):
           with open(os.path.join(rule_path, sparqlrule), 'r') as file:
                rule = file.read()

                for key, value in mappings.items():
                    rule = rule.replace(f"<{{{key}}}>", f"<{value}>")

                try:
                    results = self.g.query(rule)

                    result_graph = Graph()

                    for triple in results:
                        result_graph.add(triple)

                    pc += result_graph
                except Exception as e:
                    print("Parsing Error: ", e)

        return pc

    def get_last_op(self, pc):

        last_op = pc.value(subject=pc.get_URI(), predicate=tbox.nextStep)
        while last_op:
            if not pc.value(subject=last_op, predicate=tbox.nextStep):
                break
            last_op = pc.value(subject=last_op, predicate=tbox.nextStep)
        return last_op


    def _initOP(self, policy, pc):
        """
        :param IR:
        :param policy:
        :return:
        """

        initOPrules =  os.path.join(base_dir, "rules/Layer1")
        mappings = {
            "dp": abox[self.dp],
            "pc": pc.get_URI(),
            "op_uri": generate_unique_uri(abox),
        }

        pc = self.executRule(initOPrules, pc, mappings)

        return self.get_last_op(pc), pc


    def _handle_attributes(self, pc):
        operation = pc.get_URI()
        while operation:
            if pc.value(subject=operation, predicate=tbox.hasInput):
                attributes = pc.objects(subject=operation, predicate=tbox.hasInput)
                for attribute in attributes:
                    if attribute in self.attr_mappings.keys():
                        pc.remove((operation, tbox.hasInput, attribute))
                        pc.add((operation, tbox.hasInput, self.attr_mappings[attribute]))
            operation = pc.value(subject=operation, predicate=tbox.nextStep)
        return pc

    def _handle_policy_patterns(self, pc, initOP):
        """
        :param pc:
        :param policy:
        :return:
        """

        initOPrules =  os.path.join(base_dir, "rules/Layer2")
        mappings = {
            "dp": abox[self.dp],
            "pc": pc.get_URI(),
            "op_uri": generate_unique_uri(abox),
            "last_op": initOP,
            "policy_uri": pc.get_policy(),
        }
        pc = self.executRule(initOPrules, pc, mappings)

        return self.get_last_op(pc), pc

    def _parse_policy(self, policy):
        """
        Parse the policy to intermediate representation
        :param policy: policy to parse
        :return: None
        """

        # create policy checker graph
        pc = PolicyChecker(policy, self.dp)

        # Check Data Dependencies
        last_op, pc = self._initOP(policy, pc)

        # Handle Policy Patterns
        last_op, pc = self._handle_policy_patterns(pc, last_op)
        pc = self._handle_attributes(pc)

        # Report
        report_uid = generate_unique_uri(abox)
        pc.add((last_op, tbox.nextStep, report_uid))
        pc.add((report_uid, RDF.type, tbox.Report))
        # DUTY
        return pc


    def parse_contracts(self):
        """
        Get the policies associated with a data product
        :return: list of policies
        """

        # validate policies
        #if self._validate_graph() == True:
            # get policies
        policies, mappings = self._read_contracts()

        for policy in policies:
            pc = self._parse_policy(policy)
            self.g = self.g + pc

        self.g.serialize(destination=os.path.join(base_dir, "../../../FederatedComputationalGovernance/SemanticDataModel/sdm.ttl"), format="turtle")

        return self.g


In [111]:
DCParser(dp, contract).parse_contracts()

<Graph identifier=N63c8178e35b9492bb885ba9bc8795cce (<class 'rdflib.graph.Graph'>)>