In [156]:
!pip install rdflib > /dev/null
!pip install owlrl > /dev/null
!pip install pyshacl > /dev/null


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [157]:
from rdflib import *
from owlrl import *
import pyshacl

In [158]:
tbox = Namespace('http://www.semanticweb.org/acraf/ontologies/2024/healthmesh/tbox#')
abox = Namespace('http://www.semanticweb.org/acraf/ontologies/2024/healthmesh/abox#')
dcat = Namespace('https://www.w3.org/ns/dcat#')
dcterms = Namespace('http://purl.org/dc/terms/')
tb = Namespace("http://www.semanticweb.org/acraf/ontologies/2021/0/SDM#")
odrl = Namespace("http://www.w3.org/ns/odrl/2/")
prov = Namespace("http://www.w3.org/ns/prov#")
dqv = Namespace("http://www.w3.org/ns/dqv#")

# LOAD CONTRACT

In [159]:
contract = Graph()
#contract.parse("/home/acraf/psr/tfm/Fdatavalidation/DataPlatformLayer/Integration/SDP2_C.ttl")
contract.parse("/home/acraf/psr/tfm/Fdatavalidation/FederatedComputationalGovernance/SemanticDataModel/sdm.ttl")
dp = "UPENN-GBM_clinical_info_v2"

# PARSER CONTRACT

Given a Data Product, Output a DC IRs

In [160]:
def add_jsonld_instances(graph, path):
    # Adds JSON-LD instances to the graph
    with open(path, 'r') as f:
        json_ld_data = json.loads(f.read())
        instances = Graph().parse(data=json_ld_data, format='json-ld')
        graph += instances
    
    return graph

In [161]:
class PolicyChecker(Graph):
    
    """ Create Policy Checker """
    def __init__(self, p, p_type, format, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.p = p
        self.bind("ab", "http://www.semanticweb.org/acraf/ontologies/2024/healthmesh/abox#")
        self.bind("tb", "http://www.semanticweb.org/acraf/ontologies/2024/healthmesh/tbox#")
        self.URI = abox[f'pc_{p.split("#")[1]}']
        self.add((self.URI, RDF.type, tbox.PolicyChecker))
        self.add((self.URI, tbox.validates, p))
        self.add((self.URI, tbox.hasType, Literal(format.split("#")[1])))
        
        self.p_type = p_type.split("/")[-1]
                
    def get_URI(self):
        return self.URI
    
    def get_policy_type(self):
        return self.p_type
        

In [162]:

class DCParser:
    """
    Parse Policies of Data Contracts to Policy Checkers
    """

    def __init__(self, dp, graph):
        self.dp = dp
        self.g = graph
        self.op_mappings = add_jsonld_instances(Graph(), '/home/acraf/psr/tfm/Fdatavalidation/SideCar/ValidateContract/parser/operations_mappings.json')
        self.attr_mappings = None

    def _validate_graph(self) -> bool:
        """
        Validate the policies grammar is compliant with the grammar defined
        :return: conformance/non-conformance
        """
        from pyshacl import validate
        shapes = Graph().parse("./policy_grammar.ttl", format="turtle")
        conforms, report_graph, report_text = validate(self.g,shacl_graph=shapes)
        #return boolean
        return conforms
    
    def _read_contracts(self):
        """
        Get the policies associated with a data product
        :return: list of policies
        """
        contracts = self.g.objects(subject=abox[self.dp],predicate=tbox.hasDC)
        policies_list = []
        mappings_dict = {}
        for contract in contracts:
            # handle mappings
            policies = self.g.objects(subject=contract,predicate=tbox.hasPolicy)
            for policy in policies:
                print(policy)
                policies_list.append(policy)
                
            mappings = self.g.objects(subject=contract,predicate=tbox.hasMapping)
            for mapping in mappings:
                mfrom = self.g.value(subject=mapping,predicate=tbox.mfrom)
                mto = self.g.value(subject=mapping,predicate=tbox.mto)
                mappings_dict[mto] = mfrom
            
        self.attr_mappings = mappings_dict
        return policies_list, mappings_dict
    
    def _get_op(self, operation):
        

        operation = self.op_mappings.value(subject=operation,predicate=tbox.hasOP)
        subgraph = Graph()
        # Iterate over all triples in the graph that have the operation as the subject
        for s, p, o in self.op_mappings.triples((operation, None, None)):
            # Add each triple to the subgraph
            subgraph.add((s, p, o))
            
        return operation, subgraph
    
    def _get_op_constraint(self, constraint):
        
        leftop = self.g.value(subject=constraint, predicate=odrl.leftOperand)
        software_agent = self.g.value(subject=leftop, predicate=prov.wasAssociatedWith) 
        return self._get_op(software_agent)
        
    
    def _get_op_predicate(self, constraint):
        rightop = self.g.value(subject=constraint, predicate=odrl.rightOperand)
        leftop = self.g.value(subject=constraint, predicate=odrl.leftOperand)
        predicate = self.g.value(subject=constraint, predicate=odrl.operator)
        if predicate == odrl.lteq:
            predicate = Literal("<=")
        elif predicate == odrl.gteq:
            predicate = Literal(">=")
        elif predicate == odrl.isA:
            predicate = Literal("type")
        return (leftop, predicate, rightop)
    
    def _initOP(self, policy, pc):
        """
        :param IR: 
        :param policy: 
        :return: 
        """
        ta = self.g.value(subject=abox[self.dp], predicate=tbox.hasTA)
        ta_type = self.g.value(subject=ta, predicate=tbox.typeAcces)
        
        # Check TA typeAcces
        if ta_type.split("#")[1] == "Static":
            init_operation, subgraph =  self._get_op(ta_type)
            pc += subgraph
            if subgraph.value(subject=init_operation, predicate=tbox.hasParameter):
                pc.remove((init_operation, tbox.hasParameter, None))
                ta_path = self.g.value(subject=ta_type, predicate=tbox.path)
                pc.add((init_operation, tbox.hasParameter, ta_path))
          
            pc.add((pc.get_URI(), tbox.nextStep, init_operation)) 
            
            
    def _handle_attributes(self, pc, operation, attribute):
        pc.remove((operation, tbox.hasAttribute, None))
        pc.add((operation, tbox.hasAttribute, self.attr_mappings[attribute])) # MAPPING
        
    def _handle_duties(self, policy, pc):
        """
        :param pc: 
        :param policy: 
        :return: 
        """
        
        duties = self.g.objects(subject=policy, predicate=odrl.duty)
        operation = ""
        for duty in duties:
            
            target = self.g.value(subject=duty, predicate = odrl.target)
            constraint = self.g.value(subject=duty, predicate = odrl.constraint)
            action = self.g.value(subject=duty, predicate = odrl.action)
            predicate = self._get_op_predicate(constraint)
            
            if pc.get_policy_type() == "dqv#QualityPolicy": #TODO: FIX THIS
                operation = self._get_op_constraint(constraint)
                # Operation
                pc.add((operation, RDF.type, tbox.Operation))
                pc.add((abox.LoadData, tbox.nextStep, operation))
                pc.add((operation, tbox.hasParameter, target))
               
                # Constraint
                pc.add((abox.Op1, RDF.type, tbox.Operator))
                pc.add((abox.Op1, tbox.operator, Literal(">=")))
                pc.add((abox.Op1, tbox.hasLeftOperand, predicate[0]))
                pc.add((abox.Op1, tbox.hasRightOperand, predicate[2]))
                pc.add((operation, tbox.nextStep, abox.Op1))
                return abox.Op1
            
            elif pc.get_policy_type() == "Privacy":
                operation, subgraph = self._get_op(action)
                pc += subgraph
                
                if subgraph.value(subject=operation, predicate=tbox.hasAttribute):
                    self._handle_attributes(pc, operation, target)
                    
                pc.add((abox.LoadData, tbox.nextStep, operation))    
                #if constraint:
                #    pc.add((abox.Op2, RDF.type, tbox.Operator))
                #    pc.add((abox.LoadData, tbox.nextStep, abox.Op2))
                #    pc.add((abox.Op2, tbox.operator, Literal(predicate[1])))
                #    pc.add((abox.Op2, tbox.hasLeftOperand, predicate[0]))
                #    pc.add((abox.Op2, tbox.hasRightOperand, predicate[2]))
                #    pc.add((abox.Op2, tbox.nextStep, operation))
                #else:
                #pc.add((operation, RDF.type, tbox.Operation))
                #pc.add((operation, tbox.hasParameter, target))
                return operation
    
    def _parse_policy(self, policy):
        """
        Parse the policy to intermediate representation
        :param policy: policy to parse
        :return: None
        """
        
        # get policy type
        p_type = self.g.value(subject=policy, predicate = RDF.type)
        # data format
        format = self.g.value(subject=abox[self.dp], predicate = tbox.hasDTT)
        
        # create policy checker graph
        pc = PolicyChecker(policy, p_type, format)
        
        # add initOperation
        self._initOP(policy, pc)
        
        # handle Duties
        last_op = self._handle_duties(policy, pc)
        
        # Report
        pc.add((last_op, tbox.nextStep, abox.Report))
        pc.add((abox.Report, RDF.type, tbox.Report))
        
        # DUTY
        return pc
    
    
    def _get_mappings(self):
        """
        Get the mappings associated with a data product
        :return: list of mappings
        """
        mappings = self.g.objects(subject=abox[self.dp],predicate=tbox.hasMapping)
        mappings_list = []
        for mapping in mappings:
            mappings_list.append(mapping)
            
        return mappings_list
    
            
    def parse_contracts(self): 
        """
        Get the policies associated with a data product
        :return: list of policies
        """
        
        # validate policies
        #if self._validate_graph() == True:
            # get policies
        policies, mappings = self._read_contracts()
        
        print(mappings)
        
        #print(policies)
        #for policy in policies:
           
        #parse policy to its ir
        pc = self._parse_policy(policies[0])
        # pc
        e_graph = self.g + pc
        e_graph.serialize(destination=f'{pc.get_URI().split("#")[1]}.ttl', format="turtle")
        
            #print(pc.serialize(format="turtle"))
        #else :
        #<   print("The policies do not comply with the grammar")
        

In [163]:
DCParser(dp, contract).parse_contracts()

http://www.semanticweb.org/acraf/ontologies/2024/healthmesh/abox#p1
http://www.semanticweb.org/acraf/ontologies/2024/healthmesh/abox#p2
http://www.semanticweb.org/acraf/ontologies/2024/healthmesh/abox#p3
{rdflib.term.URIRef('http://www.semanticweb.org/acraf/ontologies/2024/healthmesh/abox#Subject'): rdflib.term.URIRef('http://www.semanticweb.org/acraf/ontologies/2024/healthmesh/abox#ID')}
