In [23]:
!pip install rdflib > /dev/null
!pip install owlrl > /dev/null
!pip install pyshacl > /dev/null


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [24]:
from rdflib import *
from owlrl import *
import pyshacl

In [25]:
tbox = Namespace('http://www.semanticweb.org/acraf/ontologies/2024/healthmesh/tbox#')
abox = Namespace('http://www.semanticweb.org/acraf/ontologies/2024/healthmesh/abox#')
dcat = Namespace('https://www.w3.org/ns/dcat#')
dcterms = Namespace('http://purl.org/dc/terms/')
tb = Namespace("http://www.semanticweb.org/acraf/ontologies/2021/0/SDM#")
odrl = Namespace("http://www.w3.org/ns/odrl/2/")

# LOAD CONTRACT

In [26]:
contract = Graph()
contract.parse("/home/acraf/psr/tfm/Prototype/SideCar/FederateDataSource/contract.ttl")

<Graph identifier=N2c5b3945bfe742d09a71886f6e05d103 (<class 'rdflib.graph.Graph'>)>

# PARSER CONTRACT

Given a Data Product, Output a DC IRs

In [27]:
dp = "UPENN-GBM_clinical_info_v2"

In [28]:
import json

In [29]:
def add_jsonld_instances(graph, path):
    # Adds JSON-LD instances to the graph
    with open(path, 'r') as f:
        json_ld_data = json.loads(f.read())
        instances = Graph().parse(data=json_ld_data, format='json-ld')
        graph += instances
    
    return graph

In [30]:
class PolicyChecker(Graph):
    
    """ Create Policy Checker """
    def __init__(self, p, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.p = p
        self.bind("ab", "http://www.semanticweb.org/acraf/ontologies/2024/healthmesh/abox#")
        self.bind("tb", "http://www.semanticweb.org/acraf/ontologies/2024/healthmesh/tbox#")
        self.URI = abox[f'pc_{p.split("#")[1]}']
        self.add((self.URI, RDF.type, tbox.PolicyChecker))
        self.add((self.URI, tbox.validates, p))
        
                
    def get_URI(self):
        return self.URI
        

In [37]:

class DCParser:
    """
    Parse Policies of Data Contracts to Policy Checkers
    """

    def __init__(self, dp, graph):
        self.dp = dp
        self.g = graph

    def _validate_graph(self) -> bool:
        """
        Validate the policies grammar is compliant with the grammar defined
        :return: conformance/non-conformance
        """
        from pyshacl import validate
        shapes = Graph().parse("./policy_grammar.ttl", format="turtle")
        conforms, report_graph, report_text = validate(self.g,shacl_graph=shapes)
        #return boolean
        return conforms
    
    def _get_policies(self):
        """
        Get the policies associated with a data product
        :return: list of policies
        """
        contracts = self.g.objects(subject=abox[self.dp],predicate=tbox.hasDC)
        policies_list = []
        for contract in contracts:
            policies = self.g.objects(subject=contract,predicate=tbox.hasPolicy)
            policies_list.append(list(policies))
            
        return policies_list
    
    def _get_op(self, action):
        operations_mappings = Graph()
        operations_mappings = add_jsonld_instances(operations_mappings, '/home/acraf/psr/tfm/Prototype/SideCar/ValidateContract/parser/operations_mappings.json')
        operation = operations_mappings.value(subject=action,predicate=tbox.hasOP)
        return operation
    
    
    def _initOP(self, policy, pc):
        """
        :param IR: 
        :param policy: 
        :return: 
        """
        ta = self.g.value(subject=abox[self.dp], predicate=tbox.hasTA)
        ta_type = self.g.value(subject=ta, predicate=tbox.typeAcces)
        
        # Check TA typeAcces
        if ta_type.split("#")[1] == "Static":
            init_operation =  self._get_op(ta_type) # depending on the type the initOperation will be one or other.
            ta_path = self.g.value(subject=ta_type, predicate=tbox.path)
            pc.add((init_operation, RDF.type, tbox.InitOperation))
            pc.add((init_operation, tbox.hasParameter, ta_path))
            pc.add((pc.get_URI(), tbox.nextStep, init_operation)) #TODO: ADD TO MAPPINGS
               
    def _handle_duties(self, policy, pc):
        """
        :param IR: 
        :param policy: 
        :return: 
        """
        
        duties = self.g.objects(subject=policy, predicate=odrl.duty)
        operation = ""
        for duty in duties:
            target = self.g.value(subject=duty, predicate = odrl.target)
            action = self.g.value(subject=duty, predicate = odrl.action)
            operation = self._get_op(action)
            pc.add((operation, RDF.type, tbox.Operation))
            pc.add((abox.LoadData, tbox.nextStep, operation))
            pc.add((operation, tbox.hasParameter, target))
        
        return operation
        
    def _parse_policy(self, policy):
        """
        Parse the policy to intermediate representation
        :param policy: policy to parse
        :return: None
        """
        # create policy checker graph
        pc = PolicyChecker(policy)
        
        # add initOperation
        self._initOP(policy, pc)
        
        # handle Duties
        last_op = self._handle_duties(policy, pc)
        
        # Report
        pc.add((last_op, tbox.nextStep, abox.Report))
        pc.add((abox.Report, RDF.type, tbox.Report))
        
        # DUTY
        return pc
            
    def parse_contract(self): 
        """
        Get the policies associated with a data product
        :return: list of policies
        """
        
        # validate policies
        if self._validate_graph() == True:
            # get policies
            policies = self._get_policies()
            for policy in policies:
                #parse policy to its ir
                pc = self._parse_policy(policy[0])
                # pc
                e_graph = self.g + pc
                e_graph.serialize(destination=f'{pc.get_URI().split("#")[1]}.ttl', format="turtle")
                
                print(pc.serialize(format="turtle"))
        else :
            print("The policies do not comply with the grammar")
        

In [36]:
DCParser(dp, contract).parse_contract()

@prefix ab: <http://www.semanticweb.org/acraf/ontologies/2024/healthmesh/abox#> .
@prefix tb: <http://www.semanticweb.org/acraf/ontologies/2024/healthmesh/tbox#> .

ab:pc_p1 a tb:PolicyChecker ;
    tb:nextStep ab:LoadData ;
    tb:validates ab:p1 .

ab:LoadData a tb:InitOperation ;
    tb:hasParameter "/home/acraf/psr/tfm/Prototype/DataProduct/Data/Explotation/UPENN-GBM_clinical_info_v2.1.csv" ;
    tb:nextStep ab:anonymize .

ab:Report a tb:Report .

ab:anonymize a tb:Operation ;
    tb:hasParameter ab:ID ;
    tb:nextStep ab:Report .


