Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New basic rewrite rule for http module, mostly for internal uses #135

Merged
merged 2 commits into from
Jun 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .github/workflows/PRs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ jobs:
SPEASY_AMDA_MAX_CHUNK_SIZE_DAYS: "25"
SPEASY_LONG_TESTS: ""
SPEASY_INVENTORY_TESTS: ""
SPEASY_CORE_HTTP_REWRITE_RULES: '{"https://thisserver_does_not_exists.lpp.polytechnique.fr/pub/":"http://sciqlop.lpp.polytechnique.fr/cdaweb-data/pub/"}'
run: |
sudo apt update && sudo apt install -y texlive pandoc
pip install pytest pytest-cov sphinx pandoc
Expand All @@ -61,10 +62,10 @@ jobs:
twine check dist/*
- name: Upload coverage to Codecov
if: matrix.python-version == '3.10' && matrix.os == 'ubuntu-latest'
uses: codecov/codecov-action@v2
uses: codecov/codecov-action@v4
with:
token: ${{ secrets.CODECOV_TOKEN }}
file: ./coverage.xml
flags: unittests
name: codecov-umbrella
fail_ci_if_error: true
fail_ci_if_error: false
3 changes: 2 additions & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ jobs:
SPEASY_AMDA_MAX_CHUNK_SIZE_DAYS: "25"
SPEASY_LONG_TESTS: ""
SPEASY_INVENTORY_TESTS: ""
SPEASY_CORE_HTTP_REWRITE_RULES: '{"https://thisserver_does_not_exists.lpp.polytechnique.fr/pub/":"http://sciqlop.lpp.polytechnique.fr/cdaweb-data/pub/"}'
run: |
sudo apt update && sudo apt install -y texlive pandoc
pip install pytest pytest-cov sphinx pandoc
Expand All @@ -65,7 +66,7 @@ jobs:
twine upload --repository testpypi --skip-existing dist/*
- name: Upload coverage to Codecov
if: matrix.python-version == '3.10' && matrix.os == 'ubuntu-latest'
uses: codecov/codecov-action@v1
uses: codecov/codecov-action@v4
with:
token: ${{ secrets.CODECOV_TOKEN }}
file: ./coverage.xml
Expand Down
16 changes: 15 additions & 1 deletion speasy/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import configparser
import os
from typing import Any
import ast

import appdirs

Expand All @@ -22,6 +23,14 @@

_entries = {}

def _load_dict_from_repr(value: str):
if value:
d = ast.literal_eval(value)
if isinstance(d, dict):
return d
else:
raise ValueError(f"Config value can't be converted to dict: {value}")
return {}

def show():
"""Prints config entries and current values
Expand Down Expand Up @@ -148,7 +157,12 @@ def remove_entry(entry: ConfigEntry):
disabled_providers={"default": set(),
"description": """A comma separated list of providers you want to disable.
The main benefit of disabling providers is to speedup speasy loading.""",
"type_ctor": lambda x: set(x.split(','))}
"type_ctor": lambda x: set(x.split(','))},
http_rewrite_rules={"default": {},
"description": """A dictionary of rules to rewrite URLs before sending requests.
The keys are the URL to match and the values are the replacement URL.
Example: {"http://example.com": "http://localhost:8000"}""",
"type_ctor": _load_dict_from_repr},
)

proxy = ConfigSection("PROXY",
Expand Down
2 changes: 2 additions & 0 deletions speasy/core/any_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,8 @@ def _list_local_files(path: str) -> List[str]:

@CacheCall(cache_retention=timedelta(hours=12), is_pure=True)
def _list_remote_files(url: str) -> List[str]:
if not url.endswith('/'):
url += '/'
response = http.get(url)
if response.ok:
return _HREF_REGEX.findall(response.text)
Expand Down
6 changes: 5 additions & 1 deletion speasy/core/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from urllib3.util.retry import Retry

from speasy import __version__
from .url_utils import host_and_port
from .url_utils import host_and_port, apply_rewrite_rules

log = logging.getLogger(__name__)

Expand All @@ -28,6 +28,7 @@

_HREF_REGEX = re.compile(' href="([A-Za-z0-9.-_]+)">')


pool = PoolManager()


Expand Down Expand Up @@ -92,6 +93,7 @@ def __call__(self, url, headers: dict = None, params: dict = None, timeout: int
# self._adapter.timeout = timeout
headers = headers or {}
headers['User-Agent'] = USER_AGENT
url = apply_rewrite_rules(url)
return Response(self._verb(url=url, headers=headers, fields=params, timeout=timeout))


Expand All @@ -102,6 +104,7 @@ def __call__(self, url, headers: dict = None, params: dict = None, timeout: int
def urlopen(url, timeout: int = DEFAULT_TIMEOUT, headers: dict = None):
headers = {} if headers is None else headers
headers['User-Agent'] = USER_AGENT
url = apply_rewrite_rules(url)
return Response(pool.urlopen(method="GET", url=url, headers=headers, timeout=timeout))


Expand Down Expand Up @@ -131,6 +134,7 @@ def is_server_up(url: Optional[str] = None, host: Optional[str] = None, port: Op
If neither url nor host and port are provided
"""
if url is not None:
url = apply_rewrite_rules(url)
host, port = host_and_port(url)
elif host is None or port is None:
raise ValueError("Either url or host and port must be provided")
Expand Down
10 changes: 10 additions & 0 deletions speasy/core/url_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
from typing import Dict, Tuple
from urllib.parse import urlparse, urlencode
from speasy.config import core as core_config

_REWRITE_RULES_ = core_config.http_rewrite_rules.get()


def quote(*args, **kwargs):
Expand Down Expand Up @@ -87,3 +90,10 @@ def host_and_port(url: str) -> Tuple[str, int]:
return parsed.hostname, 443

return parsed.hostname, 80


def apply_rewrite_rules(url: str) -> str:
for base_url in _REWRITE_RULES_:
if url.startswith(base_url):
return _REWRITE_RULES_[base_url] + url[len(base_url):]
return url
13 changes: 13 additions & 0 deletions tests/test_file_access.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,13 @@ def test_simple_remote_bin_file(self):
self.assertIsNotNone(f)
self.assertEqual(b'\x7fELF', f.read(4))

def test_simple_remote_bin_file_with_rewrite_rules(self):
if 'SPEASY_CORE_HTTP_REWRITE_RULES' not in os.environ:
self.skipTest("No rewrite rules defined")
f = any_loc_open("https://thisserver_does_not_exists.lpp.polytechnique.fr/pub/data/ace/mag/level_2_cdaweb/mfi_h0/2014/ac_h0_mfi_20141117_v06.cdf", mode='rb')
self.assertIsNotNone(f)
self.assertIn(b'NSSDC Common Data Format', f.read(100))

def test_cached_remote_bin_file(self):
drop_item("https://hephaistos.lpp.polytechnique.fr/data/LFR/SW/LFR-FSW/3.0.0.0/fsw")
start = datetime.now()
Expand Down Expand Up @@ -75,6 +82,12 @@ def test_list_remote_files(self):
self.assertGreaterEqual(len(flist), 9)
self.assertIn('plasmaSpeaker1.webm', flist)

def test_list_remote_files_with_rewrite_rules(self):
if 'SPEASY_CORE_HTTP_REWRITE_RULES' not in os.environ:
self.skipTest("No rewrite rules defined")
flist = list_files(url='https://thisserver_does_not_exists.lpp.polytechnique.fr/pub/data/ace/mag/level_2_cdaweb/mfi_h0/2014/', file_regex=re.compile(r'[a-zA-Z0-9_]\.cdf'))
self.assertGreaterEqual(len(flist), 10)

@data(
f"{_HERE_}/resources/",
f"file://{_HERE_}/resources/"
Expand Down
Loading