# odrl PARSER

In [26]:
import sys
try:
    import pycali
except:
    !{sys.executable} -m pip install pycali
try:
    import rdflib
except:
    !{sys.executable} -m pip install rdflib

from pyld import jsonld
import json,urllib,os

class bcolors:
    HEADER = '\033[95m'
    OKBLUE = '\033[94m'
    OKCYAN = '\033[96m'
    OKGREEN = '\033[92m'
    WARNING = '\033[93m'
    FAIL = '\033[91m'
    ENDC = '\033[0m'
    BOLD = '\033[1m'
    UNDERLINE = '\033[4m'

def warnPrint(w):
    print(f"{bcolors.WARNING}<WARNING> "+w+" "+ bcolors.ENDC)
    

Rules consists of four components:
- assignee: the subject that makes the request;
- target: the dataset on which the subject wants to perform operation;
- action: the category the operation requested falls in;
- purpose: the reason for which the dataset is being requested (a different definition with respect to ODRL vocabolary, see MOSAICrOWN vocabolary).


In [27]:
singleData={
    "@context": "http://www.w3.org/ns/odrl.jsonld",
    "@type": "Set",
    "uid": "http://example.com/policy:1010",
    "permission": [{
        "target": "http://example.com/asset:9898.movie",
        "action": "use"
    }]
}

multipleTarget={
    "@context": "http://www.w3.org/ns/odrl.jsonld",
    "@type": "Policy",
    "uid": "http://example.com/policy:8888",
    "profile": "http://example.com/odrl:profile:20",
    "permission": [{
        "target": "http://example.com/music/1999.mp3",
        "assignee": "http://example.com/people/billie",
        "assigner": "http://example.com/org/sony-music",
        "action": "play"
    },
    {
        "target": "http://example.com/music/1999.mp3",
        "assignee": "http://example.com/people/joe",
        "assigner": "http://example.com/org/sony-music",
        "action": "stream"
    },
	{
        "target": "http://example.com/music/PurpleRain.mp3",
        "assignee": "http://example.com/people/danny",
        "assigner": "http://example.com/org/sony-music",
        "action": "play"
    },
	{
        "target": "http://example.com/music/PurpleRain.mp3",
        "assignee": "http://example.com/people/alex",
        "assigner": "http://example.com/org/sony-music",
        "action": "stream"
    }]
}


In [28]:
%run TreePrinter.ipynb

FIRST TREE
`- [31mroot[0m
   `- [31m1[0m
      `- [31mA[0m
SECOND TREE
`- [31mroot[0m
   |- [31m1[0m
   |  `- [31mB[0m
   |     `- [31mC[0m
   `- [31m2[0m
MERGED
`- [31mroot[0m
   |- [31m1[0m
   |  |- [31mA[0m
   |  `- [31mB[0m
   |     `- [31mC[0m
   `- [31m2[0m


### Create the db tree from files 
This function takes uri from files and generate the tree needed for the program.<br>
First line of the file is the url of the db.

In [29]:
'''
create db tree compatible with the odrl rules
'''
TESTREES={} 
def treeCreator():
    global TESTREES
    dbFolder = os.getcwd() + "/db/"
    for file in os.listdir(dbFolder):
        with open(dbFolder + file) as file:
            IRI , *lines = file.read().splitlines()
            if(IRI in TESTREES):
                root=TESTREES[IRI]
            else:
                root= Node(IRI)
            for l in lines:
                data=l.strip().split('/')
                current=root
                for level in data:
                    #print(level)
                    if current.has_child(level)==True:
                        for n in current.get_children():
                            if n.data == level:
                                current=n
                    else:
                        n = Node(level)
                        current.add_child(n)
                        current=n
        TESTREES[IRI]=root


treeCreator()

## Policy Searcher
- policies are loaded from odrl files placed inside a folder
- db structures and data are loaded from simple txt files containing each single iri available

A resource is authorized ONLY if there is a policy saying that or if is part of the child tree of an authorized resource.<br>
If a user have access to every resource under the same father the father is still not authorized.

### Prohibition search
In case of conflict will follow the [rule](https://www.w3.org/TR/odrl-model/#prohibition):
```
 Additionally, in case of any conflicts in the Policy (e.g., between Permissions and Prohibitions), the conflict property of the Policy is set to perm indicating that the Permissions will take precedence.
```
the values of conflict can be: 
- invalid : policy will be voided
- perm  : perm override prohibition
- prohibit  : prohibition takes priority

if no conflict field is found the default action is prohibition


### Permission Searcher

In [30]:
import pycali,rdflib
import json,urllib,re
from urllib.parse import urlparse

def checkIRI(IRI):
    """
    return only the iri of the db
    """
    p=urlparse(IRI)
    if(p.scheme and p.netloc):
        return p.scheme+"://"+p.netloc
    else:
        warnPrint("invalid IRI :"+IRI)
        return False



permissionList = []
odrlFolder = os.getcwd() + "/odrl/"

def permissionSearcher(name,IRI,permissionType)-> Node:
    """
    search which files an assignee has access with specified permission on a db defined by the IRI.
    Create copy of tree and return that one.
    
    :param name: iri or string of the assignee.
    :param IRI (str): IRI of the db.
    :param permissionType (str): filter on permission
    """
    global TESTREES
    if(IRI in TESTREES):
        dbTree=copy.deepcopy(TESTREES[IRI])
    else:
        return None
    for filename in os.listdir(odrlFolder):
        if filename.endswith(".odrl"):
            with open(odrlFolder+filename) as file:
                raw=file.read()
                compact=json.loads(raw)
                # compact = jsonld.compact(data,'http://www.w3.org/ns/odrl.jsonld')
                if 'conflict' in compact:
                    conflict = compact["conflict"]
                else:
                    conflict = "prohibit"
                for perm in compact["permission"]:
                    if(perm["assignee"]==name and perm["action"]==permissionType ):
                        #print(perm["target"]," : ",perm["action"])
                        trg=perm["target"].replace(IRI,'').strip().split("/")
                        lvl=dbTree
                        #go down the tree
                        for t in trg:
                            if lvl.has_child(t):
                                lvl=lvl.get_child(t)
                        lvl.setHierarchicalAuth(True)
                        lvl.setInverseHierarchicalAuth(True) #TODO nonsense
                if (conflict=="prohibit" or conflict=="void") and ('prohibition' in compact):
                    # print("checking prohibition")
                    for pro in compact["prohibition"]:
                        if(pro["assignee"]==name and pro["action"]==permissionType ):
                            # print(perm["target"])
                            trg=perm["target"].replace(IRI,'').strip().split("/")
                            lvl=dbTree
                            #go down the tree
                            for t in trg:
                                if lvl.has_child(t):
                                    lvl=lvl.get_child(t)
                            lvl.setHierarchicalAuth(False)
    return dbTree
 
          
    

    

### Testing permission searcher

In [31]:
import copy
exeIRI="http://example.com"
billieTree=permissionSearcher("http://example.com/people/billie",exeIRI,"play")
print("BILLIE")
pprint_tree(billieTree)

dannyTree=permissionSearcher("http://example.com/people/danny",exeIRI,"play")
print("DANNY")
pprint_tree(dannyTree)


BILLIE
`- [31mhttp://example.com[0m
   |- [31mmusic[0m
   |  |- [31m1997.mp3[0m
   |  |- [31mRunToTheHills.mp3[0m
   |  |- [31mPurpleRain.mp3[0m
   |  |- [32m1999.mp3[0m
   |  |- [31mHellsBells.mp3[0m
   |  |- [31mJailhouseRock.mp3[0m
   |  |- [31mTheRoadCrew.mp3[0m
   |  |- [31mMoonlightDrive.mp3[0m
   |  |- [31mWaffle.mp3[0m
   |  |- [31mWhereTheEaglesDare.mp3[0m
   |  `- [31mLondonCalling.mp3[0m
   |- [32mnewMusic[0m
   |  |- [32mThunder.mp3[0m
   |  |- [32mLastDayOnEarth.mp3[0m
   |  |- [32mChampion.mp3[0m
   |  |- [32mWorldAtOurFeet.mp3[0m
   |  |- [32mHowFarIllGo.mp3[0m
   |  `- [32mSunny.mp3[0m
   |- [31mpeople[0m
   |  |- [31mbillie[0m
   |  |- [31mjoe[0m
   |  |- [31mdanny[0m
   |  `- [31malex[0m
   `- [31morg[0m
      |- [31msony-music[0m
      |- [31mvirgin-records[0m
      `- [31mUMPG[0m
DANNY
`- [32mhttp://example.com[0m
   |- [32mmusic[0m
   |  |- [32m1997.mp3[0m
   |  |- [32mRunToTheHills.mp3[0m
   |  |- 

## QUERY ANALYZER
- query in ingresso (nel linguaggio che preferisci)
- ragionare su permessi e divieti della policy, e riscrivere la query in modo tale da restituire la più ampia porzione dati a cui un soggetto può accedere. Avendo permessi e divieti su una struttura ad albero, avrai diverse alternative di implementazione (per esempio, potresti immaginare che i divieti posti su un nodo si propagano verso i nodi figlio sovrascrivendo eventuali permessi).


[Come si richiedono dati gerarchici in sql?](https://learnsql.com/blog/how-to-query-hierarchical-data/)

[esistono metodi di query migliori per questi tipi di dati?]

esempi di query:
- tutti i dati a cui un utente ha accesso:
    ```
        GET example.com/
    ```
- info utente
    ```
        GET example.com/{user_id}
    ```
- info dato relativo a utente specifico
    ```
        GET example.com/{user_id}/age
    ```
- metodi su dati: 
    * GET    (ottenere dato)
    * PUT    (modificare)
    * POST   (creare)
    * DELETE

### Parsing it!
1. we can try with format parsing:
```
    expectedFormat =re.compile('GET http://.* AS .* FOR [aA-zZ]')
```
```diff
+   more flexible
-   why!
```

2. just split at whitespace, check fixed keyword 
```
    tokens=query.split(' ')
    if(tokens[0]=GET and tokens[2]=AS and tokens[4]=FOR): 
        ...
```
```diff
+   easy to code
-   too static?
```

3. BOTH!

In [32]:
query="GET http://example.com/newMusic AS http://example.com/people/billie FOR play "
query1="GET http://example.com/music AS http://example.com/people/billie FOR play "
query2="GET http://example.com AS http://example.com/people/billie FOR play "
multipleQuery="GET http://example.com/newMusic,http://example.com/music AS http://example.com/people/billie FOR play "

def querySearch(iri,assignee,action):
    IRI=checkIRI(iri)
    if IRI:
        t=permissionSearcher(assignee,IRI,action)
        t=deleteUnauth(t)
        t=deleteUnrequested(t,iri)
        if t==None:
            print("you are not auth")
        else:
            return t

def queryAnalyzer(query):
    expectedFormat =re.compile('GET http://.* AS .* FOR [aA-zZ]')
    if expectedFormat.match(query):
        useless,*args=re.split("GET|AS|FOR",query,maxsplit=3)
        argsK=[s.strip() for s in args] #stripping , maybe assign right here?!
        del args                        #why not
        split=argsK[0].split(',')
        t=None
        for token in split:
            t1=querySearch(token,argsK[1],argsK[2])
            t=mergeTree(t,t1)
        pprint_tree(t)


# print("ASKING FOR ALL NEW MUSIC")
queryAnalyzer(multipleQuery)
# print("ASKING FOR ALL MUSIC")
# queryAnalyzer(query1)
# print("ASKING FOR ALL THE DB")
# queryAnalyzer(query2)






`- [31mhttp://example.com[0m
   |- [32mnewMusic[0m
   |  |- [32mThunder.mp3[0m
   |  |- [32mLastDayOnEarth.mp3[0m
   |  |- [32mChampion.mp3[0m
   |  |- [32mWorldAtOurFeet.mp3[0m
   |  |- [32mHowFarIllGo.mp3[0m
   |  `- [32mSunny.mp3[0m
   `- [31mmusic[0m
      `- [32m1999.mp3[0m
