From 854b970eb4d188c1373e1b77b02669281022e136 Mon Sep 17 00:00:00 2001 From: kreczko Date: Fri, 2 Sep 2022 12:28:36 +0100 Subject: [PATCH 1/3] added CMS Storage Catalog --- src/xrdsum/cli.py | 2 +- src/xrdsum/storage_catalog.py | 17 -------- src/xrdsum/storage_catalog/__init__.py | 38 +++++++++++++++++ src/xrdsum/storage_catalog/_base.py | 12 ++++++ src/xrdsum/storage_catalog/cms.py | 58 +++++++++++++++++++++++++ tests/storage.xml | 59 ++++++++++++++++++++++++++ tests/test_cms_storage_catalog.py | 42 ++++++++++++++++++ tests/test_storage_catalog.py | 25 +++++++++++ 8 files changed, 235 insertions(+), 18 deletions(-) delete mode 100644 src/xrdsum/storage_catalog.py create mode 100644 src/xrdsum/storage_catalog/__init__.py create mode 100644 src/xrdsum/storage_catalog/_base.py create mode 100644 src/xrdsum/storage_catalog/cms.py create mode 100644 tests/storage.xml create mode 100644 tests/test_cms_storage_catalog.py create mode 100644 tests/test_storage_catalog.py diff --git a/src/xrdsum/cli.py b/src/xrdsum/cli.py index cd3235f..8b35e5a 100644 --- a/src/xrdsum/cli.py +++ b/src/xrdsum/cli.py @@ -51,7 +51,7 @@ def get( """, ), storage_catalog: str = typer.Option( - default="/etc/xrootd/storage.xml", + default="cms|/etc/xrootd/storage.xml?direct", help="Path to the CMS storage catalog", ), file_system: str = typer.Option( diff --git a/src/xrdsum/storage_catalog.py b/src/xrdsum/storage_catalog.py deleted file mode 100644 index 3484cff..0000000 --- a/src/xrdsum/storage_catalog.py +++ /dev/null @@ -1,17 +0,0 @@ -"""Implementations of storage catalogs.""" - - -from __future__ import annotations - - -def __resolve_cms_path(file_path: str, storage_catalog: str) -> str: - """Resolve the CMS path of a file.""" - raise NotImplementedError() - - -def resolve_file_path(file_path: str, storage_catalog: str) -> str: - """Resolve the file path given a file catalog.""" - if not storage_catalog: - return file_path - - return file_path diff --git a/src/xrdsum/storage_catalog/__init__.py b/src/xrdsum/storage_catalog/__init__.py new file mode 100644 index 0000000..6efc34b --- /dev/null +++ b/src/xrdsum/storage_catalog/__init__.py @@ -0,0 +1,38 @@ +"""xrdsum.checksums package""" +from __future__ import annotations + +from ._base import StorageCatalog +from .cms import CMSStorageCatalog + +AVAILABLE_STORAGE_CATALOGS: dict[str, type[StorageCatalog]] = { + "cms": CMSStorageCatalog, +} + + +def resolve_file_path(file_path: str, storage_catalog: str) -> str: + """Resolve the file path given a file catalog.""" + if not storage_catalog: + return file_path + + # split experiment, storage catalog and protocol + # e.g. cms|/etc/xrootd/storage.xml?direct + experiment, storage_catalog = storage_catalog.split("|", maxsplit=1) + if experiment not in AVAILABLE_STORAGE_CATALOGS: + raise ValueError( + f"Unknown storage catalog {experiment}. Available: {AVAILABLE_STORAGE_CATALOGS.keys()}" + ) + + protocol = None + if "?" in storage_catalog: + storage_catalog, protocol = storage_catalog.split("?", maxsplit=1) + + catalog = AVAILABLE_STORAGE_CATALOGS[experiment](storage_catalog, protocol=protocol) + return catalog.lfn2pfn(file_path) + + +__all__ = [ + "CMSStorageCatalog", + "StorageCatalog", + "AVAILABLE_STORAGE_CATALOGS", + "resolve_file_path", +] diff --git a/src/xrdsum/storage_catalog/_base.py b/src/xrdsum/storage_catalog/_base.py new file mode 100644 index 0000000..f70544e --- /dev/null +++ b/src/xrdsum/storage_catalog/_base.py @@ -0,0 +1,12 @@ +from __future__ import annotations + +from typing import Protocol + + +class StorageCatalog(Protocol): + def __init__(self, config: str | None = None, protocol: str | None = None) -> None: + raise NotImplementedError() + + def lfn2pfn(self, lfn: str) -> str: + """Converts logical filename to physical filename""" + raise NotImplementedError() diff --git a/src/xrdsum/storage_catalog/cms.py b/src/xrdsum/storage_catalog/cms.py new file mode 100644 index 0000000..d0762f0 --- /dev/null +++ b/src/xrdsum/storage_catalog/cms.py @@ -0,0 +1,58 @@ +# implementation of the Compact Muon Solenoid (CMS) file catalog lookup +from __future__ import annotations + +import re +from dataclasses import field + +from ._base import StorageCatalog + + +class CMSStorageCatalog(StorageCatalog): + """CMS file catalog lookup""" + + config: str = "storage.xml" + protocol: str = "direct" + transformation_rules: list[tuple[re.Pattern[str], str]] = field( + default_factory=list + ) + + def __init__(self, config: str | None = None, protocol: str | None = None) -> None: + if config is not None: + self.config = config + if protocol is not None: + self.protocol = protocol + self.transformation_rules = self.__read_config() + + def __read_config(self) -> list[tuple[re.Pattern[str], str]]: + """Reads the CMS storage catalog""" + from xml.dom.minidom import parse + + cms_rules: list[tuple[re.Pattern[str], str]] = [] + with parse(self.config) as dom: + rules = dom.getElementsByTagName("lfn-to-pfn") + for rule in rules: + if rule.getAttribute("protocol") != self.protocol: + continue + pattern = re.compile(rule.getAttribute("path-match")) + result = rule.getAttribute("result") + cms_rules.append((pattern, result)) + return cms_rules + + def is_cms_path(self, path: str) -> bool: + """Checks if the path is a CMS path""" + cms_regex = re.compile(r"^/*(store.*)") + match = cms_regex.match(path) + if match: + return True + return False + + def lfn2pfn(self, lfn: str) -> str: + """Converts logical filename to physical filename""" + for rule in self.transformation_rules: + pattern, result = rule + match = pattern.match(lfn) + if match is None: + continue + result = result.replace("$", "\\") + return match.expand(result) + return lfn diff --git a/tests/storage.xml b/tests/storage.xml new file mode 100644 index 0000000..ccbdbe4 --- /dev/null +++ b/tests/storage.xml @@ -0,0 +1,59 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/test_cms_storage_catalog.py b/tests/test_cms_storage_catalog.py new file mode 100644 index 0000000..d419def --- /dev/null +++ b/tests/test_cms_storage_catalog.py @@ -0,0 +1,42 @@ +from __future__ import annotations + +import pytest + +from xrdsum import storage_catalog +from xrdsum.storage_catalog import AVAILABLE_STORAGE_CATALOGS + + +@pytest.fixture +def cms_config(): + return "tests/storage.xml", "direct" + + +@pytest.mark.parametrize( + "path, result", + [ + ("/store/user/johndoe/test.txt", True), + ("/xrootd/cms/store/user/johndoe/test.txt", False), + ("/xrootd/othervo/test.txt", False), + ], +) +def test_is_cms_path(cms_config, path, result): + cms = storage_catalog.CMSStorageCatalog(*cms_config) + assert cms.is_cms_path(path) == result + + +@pytest.mark.parametrize( + "lfn, pfn", + [ + ("/store/user/johndoe/test.txt", "/xrootd/cms/store/user/johndoe/test.txt"), + ( + "/xrootd/cms/store/user/johndoe/test.txt", + "/xrootd/cms/store/user/johndoe/test.txt", + ), + ("/xrootd/othervo/test.txt", "/xrootd/othervo/test.txt"), + ], +) +def test_cms_catalog(cms_config, lfn, pfn): + assert "cms" in AVAILABLE_STORAGE_CATALOGS + storage_catalog, protocol = cms_config + catalog = AVAILABLE_STORAGE_CATALOGS["cms"](storage_catalog, protocol) + assert catalog.lfn2pfn(lfn) == pfn diff --git a/tests/test_storage_catalog.py b/tests/test_storage_catalog.py new file mode 100644 index 0000000..1c50887 --- /dev/null +++ b/tests/test_storage_catalog.py @@ -0,0 +1,25 @@ +from __future__ import annotations + +import pytest + +from xrdsum.storage_catalog import resolve_file_path + + +@pytest.fixture +def cli_param(): + return "cms|tests/storage.xml?direct" + + +@pytest.mark.parametrize( + "lfn, pfn", + [ + ("/store/user/johndoe/test.txt", "/xrootd/cms/store/user/johndoe/test.txt"), + ( + "/xrootd/cms/store/user/johndoe/test.txt", + "/xrootd/cms/store/user/johndoe/test.txt", + ), + ("/xrootd/othervo/test.txt", "/xrootd/othervo/test.txt"), + ], +) +def test_resolve_file_path(cli_param, lfn, pfn): + assert resolve_file_path(lfn, cli_param) == pfn From 90206255850f857830ca76000fc5a93e25f76542 Mon Sep 17 00:00:00 2001 From: kreczko Date: Fri, 2 Sep 2022 12:37:59 +0100 Subject: [PATCH 2/3] added missing docstrings for storage_catalog --- src/xrdsum/storage_catalog/_base.py | 9 +++++++++ src/xrdsum/storage_catalog/cms.py | 6 +++--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/xrdsum/storage_catalog/_base.py b/src/xrdsum/storage_catalog/_base.py index f70544e..d14fb7d 100644 --- a/src/xrdsum/storage_catalog/_base.py +++ b/src/xrdsum/storage_catalog/_base.py @@ -1,9 +1,18 @@ +""" +Base module for storage catalog classes. +""" from __future__ import annotations from typing import Protocol class StorageCatalog(Protocol): + """ + Base definition for storage catalog classes. + A storage catalog needs to implement the following methods: + - __init__, which takes a config file path and protocol as an arguments + - lfntopfn, which takes a logical filename and returns a physical filename""" + def __init__(self, config: str | None = None, protocol: str | None = None) -> None: raise NotImplementedError() diff --git a/src/xrdsum/storage_catalog/cms.py b/src/xrdsum/storage_catalog/cms.py index d0762f0..36be58a 100644 --- a/src/xrdsum/storage_catalog/cms.py +++ b/src/xrdsum/storage_catalog/cms.py @@ -1,8 +1,9 @@ -# implementation of the Compact Muon Solenoid (CMS) file catalog lookup +"""Implementation of the Compact Muon Solenoid (CMS) file catalog lookup (storage.xml) """ from __future__ import annotations import re from dataclasses import field +from xml.dom.minidom import parse as parse_xml from ._base import StorageCatalog @@ -25,10 +26,9 @@ def __init__(self, config: str | None = None, protocol: str | None = None) -> No def __read_config(self) -> list[tuple[re.Pattern[str], str]]: """Reads the CMS storage catalog""" - from xml.dom.minidom import parse cms_rules: list[tuple[re.Pattern[str], str]] = [] - with parse(self.config) as dom: + with parse_xml(self.config) as dom: rules = dom.getElementsByTagName("lfn-to-pfn") for rule in rules: if rule.getAttribute("protocol") != self.protocol: From 8c566bf200d908171d26ae08f3c4a50f300ed352 Mon Sep 17 00:00:00 2001 From: kreczko Date: Fri, 2 Sep 2022 12:47:22 +0100 Subject: [PATCH 3/3] change minimal Python version from 3.7 to 3.8 --- .github/workflows/ci.yml | 2 +- pyproject.toml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6b3bc23..537169d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -42,7 +42,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.7", "3.10"] + python-version: ["3.8", "3.10"] runs-on: [ubuntu-latest, macos-latest, windows-latest] include: diff --git a/pyproject.toml b/pyproject.toml index 902859b..94e9f17 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,7 @@ license = { file = "LICENSE" } description = "Package for retrieving and calculating checksums for XRootD" readme = "README.md" -requires-python = ">=3.7" +requires-python = ">=3.8" classifiers = [ "License :: OSI Approved :: BSD License", @@ -108,7 +108,7 @@ known_third_party = ["typer", "click"] [tool.pylint] -master.py-version = "3.7" +master.py-version = "3.8" master.ignore-paths= ["src/xrdsum/_version.py"] reports.output-format = "colorized" similarities.ignore-imports = "yes"