In [None]:
from enum import Enum
from pathlib import Path

from pydantic import BaseModel
from wcmatch import glob

Assumptions (easy to change)
- I assume that we are storing rules instead of direct file permissions (because of create, you need to store rules anyway)
When we create: we need to match patterns like `**/*.txt` against non existing paths. Therefore it would be challenging to only store permissions for a predefined path in the database. Therefore, we also need to store the rules, containing abstract paths


- I assume all rules are of the same format
- if a permfile lower in the tree defines a lower permission, its not overwritten, instead its the union
- in the future we can use disallow if you want to restrict permissions from a permfile higher in the tree (out of scope for this prototype)

rule types:

```
admin: x@openmined.org
create: *


- rule
  - glob:*
  - user:x@openmined.org
  - type: allow
```

TODO:
- decide, do we want to allow for patterns like ../../*

In [None]:
class PermissionType(Enum):
    CREATE = 1
    READ = 2
    WRITE = 3
    ADMIN = 4


class PermissionRule(BaseModel):
    dir_path: Path  # where does this permfile live
    path_pattern: str  # what paths does it apply to (e.g. **/*.txt)
    user: str  # can be *,
    permtype: PermissionType  # read/write/create/admin

    @property
    def has_email_template(self):
        return "{useremail}" in self.path_pattern

    def resolve_path_pattern(self, email):
        return self.path_pattern.replace("{useremail}", email)

lets assume this directory structure

```
owner@datasite.org
├── folder1
│   ├── syftperm.yml 
│   ├── folder2
│   │   ├── syftperm.yml 
```

Lets say looks like this owner@datasite.org/folder1/.syftperm

```
- rule
  - glob:*/*
  - user:user@example.org
  - permission: write
  - type: allow # we dont use this right now

- rule
  - glob: {useremail}/*
  - user: *
  - permission: create
  - type: allow # we dont use this right now

```


Lets assume we have more files, but we can extract some rules:

In [None]:
rule_table = [
    PermissionRule(dir_path="owner@datasite.org", path_pattern="*.txt", user="*", permtype=PermissionType.ADMIN),
    PermissionRule(
        dir_path="owner@datasite.org/folder1",
        path_pattern="*/*",
        user="user@example.org",
        permtype=PermissionType.WRITE,
    ),
    PermissionRule(
        dir_path="owner@datasite.org/folder1",  # we can have multiple rules on one level
        path_pattern="{useremail}/*",
        user="*",
        permtype=PermissionType.CREATE,
    ),
    PermissionRule(
        dir_path="owner@datasite.org/folder1/folder2", path_pattern="*.txt", user="*", permtype=PermissionType.READ
    ),
]

In [None]:
def get_rules_for_path(path: Path):
    # TODO: all the rules in that branch of the tree
    return rule_table

In [None]:
# util
def issubpath(path1, path2):
    return path1 in path2.parents

In [None]:
class ComputedPermission(BaseModel):
    user: str
    file_path: Path

    perms: dict[PermissionType, bool] = {
        PermissionType.READ: False,
        PermissionType.CREATE: False,
        PermissionType.WRITE: False,
        PermissionType.ADMIN: False,
    }

    def has_permission(self, permtype: PermissionType):
        return self.perms[permtype]

    def user_matches(self, rule: PermissionRule):
        """Computes if the user in the rule"""
        if rule.user == "*":
            return True
        elif rule.user == self.user:
            return True
        else:
            return False

    def rule_applies_to_path(self, rule: PermissionRule):
        if rule.has_email_template:
            # we fill in a/b/{useremail}/*.txt -> a/b/user@email.org/*.txt
            resolved_path_pattern = rule.resolve_path_pattern(self.user)
        else:
            resolved_path_pattern = rule.path_pattern

        # target file path (the one that we want to check permissions for relative to the syftperm file
        # we need this because the syftperm file specifies path patterns relative to its own location

        if issubpath(rule.dir_path, self.file_path):
            relative_file_path = self.file_path.relative_to(rule.dir_path)
            return glob.globmatch(relative_file_path, resolved_path_pattern)
        else:
            return False

    def _add_permission(self, permtype: PermissionType):
        add = []
        if permtype == PermissionType.READ:
            add.append(PermissionType.READ)
        elif permtype == PermissionType.CREATE:
            add.append(PermissionType.CREATE)
        elif permtype == PermissionType.WRITE:
            add += [PermissionType.WRITE, PermissionType.READ]
        elif permtype == PermissionType.ADMIN:
            add += [PermissionType.WRITE, PermissionType.READ, PermissionType.CREATE, PermissionType.ADMIN]

        for perm in add:
            self.perms[perm] = True

    def apply(self, rule: PermissionRule):
        if self.user_matches(rule) and self.rule_applies_to_path(rule):
            self._add_permission(rule.permtype)

In [None]:
def can_create(file_path: Path, email: str):
    # this has to be retrieved in a certain order that respects:
    # 1. permissions lower in the tree to be applied on top of permissions higher in the tree
    # 2. permisisons later in the file to be applied on top of permissions earlier in the file
    # 3. later this also has to take into account terminal permissions

    rules: List[PermissionRule] = get_rules_for_path(file_path)

    permission = ComputedPermission(user=email, file_path=file_path)

    for rule in rules:
        permission.apply(rule)

    return permission.has_permission(PermissionType.CREATE)

In [None]:
# def upload(filename, email):
file_path = Path("owner@datasite.org/folder1/user@example.org/file.txt")
email = "user@example.org"
can_create(file_path, email)

In [None]:
glob.globmatch

Supports unix shell style globbing patterns and is highly configurable:

- **Asterisk (*)**: Matches zero or more characters within a single directory level. For example:
    - *.txt will match all files with a .txt extension.
    - dir/* will match all files in dir, but not in its subdirectories.
- **Double Asterisk (**)**: Matches zero or more directories, making it recursive. This is commonly seen in .gitignore files. For example:
    - dir/** will match all files in dir and its subdirectories at any level.
    - **/*.log will match any .log file in the repository, at any depth.
- **Question Mark (?)**: Matches exactly one character. For example: (SKIP?)
    - file?.txt will match file1.txt or fileA.txt, but not file12.txt.
- **Square Brackets ([abc])**: Matches any single character within the brackets, similar to regex. For example: (SKIP?)
    - file[1-3].txt will match file1.txt, file2.txt, and file3.txt.
- **Exclamation Mark (!)**: Used in .gitignore to negate patterns. For example: (V2?)
    - !important.txt will force Git to include important.txt even if a broader pattern (like *.txt) excludes it.
