Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: ["3.7", "3.10"]
python-version: ["3.8", "3.10"]
runs-on: [ubuntu-latest, macos-latest, windows-latest]

include:
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ license = { file = "LICENSE" }
description = "Package for retrieving and calculating checksums for XRootD"
readme = "README.md"

requires-python = ">=3.7"
requires-python = ">=3.8"

classifiers = [
"License :: OSI Approved :: BSD License",
Expand Down Expand Up @@ -108,7 +108,7 @@ known_third_party = ["typer", "click"]


[tool.pylint]
master.py-version = "3.7"
master.py-version = "3.8"
master.ignore-paths= ["src/xrdsum/_version.py"]
reports.output-format = "colorized"
similarities.ignore-imports = "yes"
Expand Down
2 changes: 1 addition & 1 deletion src/xrdsum/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def get(
""",
),
storage_catalog: str = typer.Option(
default="/etc/xrootd/storage.xml",
default="cms|/etc/xrootd/storage.xml?direct",
help="Path to the CMS storage catalog",
),
file_system: str = typer.Option(
Expand Down
17 changes: 0 additions & 17 deletions src/xrdsum/storage_catalog.py

This file was deleted.

38 changes: 38 additions & 0 deletions src/xrdsum/storage_catalog/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
"""xrdsum.checksums package"""
from __future__ import annotations

from ._base import StorageCatalog
from .cms import CMSStorageCatalog

AVAILABLE_STORAGE_CATALOGS: dict[str, type[StorageCatalog]] = {
"cms": CMSStorageCatalog,
}


def resolve_file_path(file_path: str, storage_catalog: str) -> str:
"""Resolve the file path given a file catalog."""
if not storage_catalog:
return file_path

# split experiment, storage catalog and protocol
# e.g. cms|/etc/xrootd/storage.xml?direct
experiment, storage_catalog = storage_catalog.split("|", maxsplit=1)
if experiment not in AVAILABLE_STORAGE_CATALOGS:
raise ValueError(
f"Unknown storage catalog {experiment}. Available: {AVAILABLE_STORAGE_CATALOGS.keys()}"
)

protocol = None
if "?" in storage_catalog:
storage_catalog, protocol = storage_catalog.split("?", maxsplit=1)

catalog = AVAILABLE_STORAGE_CATALOGS[experiment](storage_catalog, protocol=protocol)
return catalog.lfn2pfn(file_path)


__all__ = [
"CMSStorageCatalog",
"StorageCatalog",
"AVAILABLE_STORAGE_CATALOGS",
"resolve_file_path",
]
21 changes: 21 additions & 0 deletions src/xrdsum/storage_catalog/_base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
"""
Base module for storage catalog classes.
"""
from __future__ import annotations

from typing import Protocol


class StorageCatalog(Protocol):
"""
Base definition for storage catalog classes.
A storage catalog needs to implement the following methods:
- __init__, which takes a config file path and protocol as an arguments
- lfntopfn, which takes a logical filename and returns a physical filename"""

def __init__(self, config: str | None = None, protocol: str | None = None) -> None:
raise NotImplementedError()

def lfn2pfn(self, lfn: str) -> str:
"""Converts logical filename to physical filename"""
raise NotImplementedError()
58 changes: 58 additions & 0 deletions src/xrdsum/storage_catalog/cms.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
"""Implementation of the Compact Muon Solenoid (CMS) file catalog lookup (storage.xml) """
from __future__ import annotations

import re
from dataclasses import field
from xml.dom.minidom import parse as parse_xml

from ._base import StorageCatalog


class CMSStorageCatalog(StorageCatalog):
"""CMS file catalog lookup"""

config: str = "storage.xml"
protocol: str = "direct"
transformation_rules: list[tuple[re.Pattern[str], str]] = field(
default_factory=list
)

def __init__(self, config: str | None = None, protocol: str | None = None) -> None:
if config is not None:
self.config = config
if protocol is not None:
self.protocol = protocol
self.transformation_rules = self.__read_config()

def __read_config(self) -> list[tuple[re.Pattern[str], str]]:
"""Reads the CMS storage catalog"""

cms_rules: list[tuple[re.Pattern[str], str]] = []
with parse_xml(self.config) as dom:
rules = dom.getElementsByTagName("lfn-to-pfn")
for rule in rules:
if rule.getAttribute("protocol") != self.protocol:
continue
pattern = re.compile(rule.getAttribute("path-match"))
result = rule.getAttribute("result")
cms_rules.append((pattern, result))
return cms_rules

def is_cms_path(self, path: str) -> bool:
"""Checks if the path is a CMS path"""
cms_regex = re.compile(r"^/*(store.*)")
match = cms_regex.match(path)
if match:
return True
return False

def lfn2pfn(self, lfn: str) -> str:
"""Converts logical filename to physical filename"""
for rule in self.transformation_rules:
pattern, result = rule
match = pattern.match(lfn)
if match is None:
continue
result = result.replace("$", "\\")
return match.expand(result)
return lfn
59 changes: 59 additions & 0 deletions tests/storage.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
<!-- storage.xml for T2_UK_SGrid_Bristol

Storage element: xrootd.phy.bris.ac.uk
Storage software: XROOTD 5.3.2

All CMS data are stored using the /store convention
Therefore need to map:
/+store/(.*)
and pass through for everything else

Special case: LoadTest. Needs to come first for every type of mapping.
-->
<storage-mapping>
<!-- Mappings are applied in order, so first to match wins. as store is most generic make sure its last -->

<!-- Bypass all correct paths -->
<lfn-to-pfn protocol="direct" path-match="/+xrootd/(.*)" result="/xrootd/$1" />
<pfn-to-lfn protocol="direct" path-match="/+xrootd/(.*)" result="/xrootd/$1" />

<lfn-to-pfn protocol="direct" path-match="/+cksums//xrootd/(.*)" result="/cksums/xrootd/$1" />
<pfn-to-lfn protocol="direct" path-match="/+cksums//xrootd/(.*)" result="/cksums/xrootd/$1" />
<!-- End bypass -->

<!-- Specific for LoadTest07 -->
<lfn-to-pfn protocol="direct" path-match=".*/LoadTest07_Bristol_(.*)_.*_.*" result="/xrootd/cms/store/PhEDEx_LoadTest07/LoadTest07_Debug_T2_UK_SGrid_Bristol/LoadTest07_SouthGrid_Bristol_$1" />
<lfn-to-pfn protocol="direct" path-match=".*/LoadTest07_SouthGrid_Bristol_(.*)_.*_.*" result="/xrootd/cms/store/PhEDEx_LoadTest07/LoadTest07_Debug_T2_UK_SGrid_Bristol/LoadTest07_SouthGrid_Bristol_$1" />
<!-- End LoadTest07 -->

<!-- Load test -->
<lfn-to-pfn protocol="direct" path-match="/+LoadTest/(.*)" result="/xrootd/cms/store/phedex_loadtest/$1" />
<pfn-to-lfn protocol="direct" path-match="/+xrootd/cms/store/phedex_loadtest/(.*)" result="/LoadTest/$1" />
<!-- End load test -->

<!-- Production store -->
<lfn-to-pfn protocol="direct" path-match="/+store/test/xrootd/T2_UK_SGrid_Bristol/store/(.*)" result="/xrootd/cms/store/$1" />
<lfn-to-pfn protocol="direct" path-match="/+store/(.*)" result="/xrootd/cms/store/$1" />
<pfn-to-lfn protocol="direct" path-match="/+xrootd/cms/store/(.*)" result="/store/$1" />
<!-- End production store -->

<!-- Local file access -->
<lfn-to-pfn protocol="file" chain="direct" path-match="(.*)" result="/hdfs/$1" />
<pfn-to-lfn protocol="file" chain="direct" path-match="/hdfs/(.*)" result="$1" />
<!-- End local file access -->

<!-- Begin xroot -->
<lfn-to-pfn protocol="xroot" chain="direct" path-match="(.*)" result="root://xrootd.phy.bris.ac.uk:1094$1" />
<pfn-to-lfn protocol="xroot" chain="direct" path-match=".*\:1094(.*)" result="$1" />
<pfn-to-lfn protocol="xroot" chain="direct" path-match=".*\:1194(.*)" result="$1" />
<!-- End xroot -->

<!-- Begin davs -->
<lfn-to-pfn protocol="davs" chain="direct" path-match="(.*)" result="davs://xrootd.phy.bris.ac.uk:1094$1" />
<!--- End davs -->

<!-- Begin xroot fallback-->
<lfn-to-pfn protocol="xrootdfallback" destination-match=".*" path-match="/+store/(.*)" result="root://cms-xrootd.gridpp.ac.uk:1094//store/$1" />
<!-- End xroot fallback-->

</storage-mapping>
42 changes: 42 additions & 0 deletions tests/test_cms_storage_catalog.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from __future__ import annotations

import pytest

from xrdsum import storage_catalog
from xrdsum.storage_catalog import AVAILABLE_STORAGE_CATALOGS


@pytest.fixture
def cms_config():
return "tests/storage.xml", "direct"


@pytest.mark.parametrize(
"path, result",
[
("/store/user/johndoe/test.txt", True),
("/xrootd/cms/store/user/johndoe/test.txt", False),
("/xrootd/othervo/test.txt", False),
],
)
def test_is_cms_path(cms_config, path, result):
cms = storage_catalog.CMSStorageCatalog(*cms_config)
assert cms.is_cms_path(path) == result


@pytest.mark.parametrize(
"lfn, pfn",
[
("/store/user/johndoe/test.txt", "/xrootd/cms/store/user/johndoe/test.txt"),
(
"/xrootd/cms/store/user/johndoe/test.txt",
"/xrootd/cms/store/user/johndoe/test.txt",
),
("/xrootd/othervo/test.txt", "/xrootd/othervo/test.txt"),
],
)
def test_cms_catalog(cms_config, lfn, pfn):
assert "cms" in AVAILABLE_STORAGE_CATALOGS
storage_catalog, protocol = cms_config
catalog = AVAILABLE_STORAGE_CATALOGS["cms"](storage_catalog, protocol)
assert catalog.lfn2pfn(lfn) == pfn
25 changes: 25 additions & 0 deletions tests/test_storage_catalog.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from __future__ import annotations

import pytest

from xrdsum.storage_catalog import resolve_file_path


@pytest.fixture
def cli_param():
return "cms|tests/storage.xml?direct"


@pytest.mark.parametrize(
"lfn, pfn",
[
("/store/user/johndoe/test.txt", "/xrootd/cms/store/user/johndoe/test.txt"),
(
"/xrootd/cms/store/user/johndoe/test.txt",
"/xrootd/cms/store/user/johndoe/test.txt",
),
("/xrootd/othervo/test.txt", "/xrootd/othervo/test.txt"),
],
)
def test_resolve_file_path(cli_param, lfn, pfn):
assert resolve_file_path(lfn, cli_param) == pfn