diff --git a/pyproject.toml b/pyproject.toml
index 4c30b9e8e..4de0c8673 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -30,7 +30,7 @@ classifiers = [
"Programming Language :: Python :: 3.12",
]
dependencies = [
- "pydantic",
+ "pydantic<2", # Pip hops between installing v2.7 or v1.10 depending on which of the additional dependencies are requested
"requests",
"rich",
"werkzeug",
diff --git a/src/murfey/server/__init__.py b/src/murfey/server/__init__.py
index 45bf9a853..92561fa60 100644
--- a/src/murfey/server/__init__.py
+++ b/src/murfey/server/__init__.py
@@ -111,7 +111,7 @@ def sanitise(in_string: str) -> str:
return in_string.replace("\r\n", "").replace("\n", "")
-def santise_path(in_path: Path) -> Path:
+def sanitise_path(in_path: Path) -> Path:
return Path("/".join(secure_filename(p) for p in in_path.parts))
diff --git a/src/murfey/server/api.py b/src/murfey/server/api.py
index d02962041..2e49d619d 100644
--- a/src/murfey/server/api.py
+++ b/src/murfey/server/api.py
@@ -43,6 +43,7 @@
get_machine_config,
get_microscope,
get_tomo_preproc_params,
+ sanitise,
templates,
)
from murfey.server.config import from_file, settings
@@ -110,10 +111,6 @@
router = APIRouter()
-def sanitise(in_string: str) -> str:
- return in_string.replace("\r\n", "").replace("\n", "")
-
-
# This will be the homepage for a given microscope.
@router.get("/", response_class=HTMLResponse)
async def root(request: Request):
diff --git a/src/murfey/server/bootstrap.py b/src/murfey/server/bootstrap.py
index f78ad36a3..42eb318de 100644
--- a/src/murfey/server/bootstrap.py
+++ b/src/murfey/server/bootstrap.py
@@ -17,6 +17,7 @@
import logging
import random
import re
+from urllib.parse import quote
import packaging.version
import requests
@@ -41,10 +42,47 @@
log = logging.getLogger("murfey.server.bootstrap")
+def _validate_package_name(package: str) -> bool:
+ """
+ Check that a package name follows PEP 503 naming conventions, containing only
+ alphanumerics, "_", "-", or "." characters
+ """
+ if re.match(r"^[a-z0-9\-\_\.]+$", package):
+ return True
+ else:
+ return False
+
+
+def _get_full_path_response(package: str) -> requests.Response:
+ """
+ Validates the package name, sanitises it if valid, and attempts to return a HTTP
+ response from PyPI.
+ """
+
+ if _validate_package_name(package):
+ # Sanitise and normalise package name (PEP 503)
+ package_clean = quote(re.sub(r"[-_.]+", "-", package.lower()))
+
+ # Get HTTP response
+ url = f"https://pypi.org/simple/{package_clean}"
+ response = requests.get(url)
+
+ if response.status_code == 200:
+ return response
+ else:
+ raise HTTPException(status_code=response.status_code)
+ else:
+ raise ValueError(f"{package} is not a valid package name")
+
+
@pypi.get("/", response_class=Response)
def get_pypi_index():
- """Obtain list of all PyPI packages via the simple API (PEP 503)."""
+ """
+ Obtain list of all PyPI packages via the simple API (PEP 503).
+ """
+
index = requests.get("https://pypi.org/simple/")
+
return Response(
content=index.content,
media_type=index.headers.get("Content-Type"),
@@ -53,33 +91,52 @@ def get_pypi_index():
@pypi.get("/{package}/", response_class=Response)
-def get_pypi_package_downloads_list(package: str):
- """Obtain list of all package downloads from PyPI via the simple API (PEP 503),
- and rewrite all download URLs to point to this server,
- underneath the current directory."""
- full_path_response = requests.get(f"https://pypi.org/simple/{package}")
-
- def rewrite_pypi_url(match):
- url = match.group(4)
- return (
- b""
- + match.group(4)
- + b""
- )
+def get_pypi_package_downloads_list(package: str) -> Response:
+ """
+ Obtain list of all package downloads from PyPI via the simple API (PEP 503), and
+ rewrite all download URLs to point to this server, under the current directory.
+ """
+
+ def _rewrite_pypi_url(match):
+ """
+ Use regular expression matching to rewrite the URLs. Points them from
+ pythonhosted.org to current server, and removes the hash from the URL as well
+ """
+ # url = match.group(4) # Original
+ url = match.group(3)
+ return '" + match.group(3) + ""
+
+ # Validate package and URL
+ full_path_response = _get_full_path_response(package)
+
+ # Process lines related to PyPI packages in response
+ content: bytes = full_path_response.content # In bytes
+ content_text: str = content.decode("latin1") # Convert to strings
+ content_text_list = []
+ for line in content_text.splitlines():
+ # Look for lines with hyperlinks
+ if "]*)"([^>]*)>([^<]*)', # Regex search criteria
+ _rewrite_pypi_url, # Search criteria applied to this function
+ line,
+ )
+ content_text_list.append(line_new)
+
+ # Add entry for wheel metadata (PEP 658; see _expose_wheel_metadata)
+ if ".whl" in line_new:
+ line_metadata = line_new.replace(".whl", ".whl.metadata")
+ content_text_list.append(line_metadata)
+ else:
+ # Append other lines as normal
+ content_text_list.append(line)
+
+ content_text_new = str("\n".join(content_text_list)) # Regenerate HTML structure
+ content_new = content_text_new.encode("latin1") # Convert back to bytes
- content = re.sub(
- b']*)href="([^">]*)"([^>]*)>([^<]*)',
- rewrite_pypi_url,
- full_path_response.content,
- )
return Response(
- content=content,
+ content=content_new,
media_type=full_path_response.headers.get("Content-Type"),
status_code=full_path_response.status_code,
)
@@ -87,18 +144,62 @@ def rewrite_pypi_url(match):
@pypi.get("/{package}/{filename}", response_class=Response)
def get_pypi_file(package: str, filename: str):
- """Obtain and pass through a specific download for a PyPI package."""
- full_path_response = requests.get(f"https://pypi.org/simple/{package}")
+ """
+ Obtain and pass through a specific download for a PyPI package.
+ """
+
+ def _expose_wheel_metadata(response_bytes: bytes) -> bytes:
+ """
+ As of pip v22.3 (coinciding with PEP 658), pip expects to find an additonal
+ ".whl.metadata" file based on the URL of the ".whl" file present on the PyPI Simple
+ Index. However, because it is not listed on the webpage itself, it is not copied
+ across to the proxy. This function adds that URL to the proxy explicitly.
+ """
+
+ # Analyse API response line-by-line
+ response_text: str = response_bytes.decode("latin1") # Convert to text
+ response_text_list = [] # Write line-by-line analysis to here
+
+ for line in response_text.splitlines():
+ # Process URLs
+ if r"]*?href="([^">]*)"[^>]*>' + filename_bytes + b"",
- full_path_response.content,
+ b']*)"[^>]*>' + filename_bytes + b"",
+ content,
)
if not selected_package_link:
raise HTTPException(status_code=404, detail="File not found for package")
original_url = selected_package_link.group(1)
original_file = requests.get(original_url)
+
return Response(
content=original_file.content,
media_type=original_file.headers.get("Content-Type"),
@@ -108,8 +209,10 @@ def get_pypi_file(package: str, filename: str):
@plugins.get("/{package}", response_class=FileResponse)
def get_plugin_wheel(package: str):
+
machine_config = get_machine_config()
wheel_path = machine_config.plugin_packages.get(package)
+
if wheel_path is None:
return None
return FileResponse(
@@ -124,6 +227,7 @@ def get_bootstrap_instructions(request: Request):
Return a website containing instructions for installing the Murfey client on a
machine with no internet access.
"""
+
return respond_with_template(
"bootstrap.html",
{
@@ -140,7 +244,10 @@ def get_pip_wheel():
This is only used during bootstrapping by the client to identify and then
download the actually newest appropriate version of pip.
"""
- return get_pypi_file(package="pip", filename="pip-21.3.1-py3-none-any.whl")
+ return get_pypi_file(
+ package="pip",
+ filename="pip-22.2.2-py3-none-any.whl", # Highest version that works before PEP 658 change
+ )
@bootstrap.get("/murfey.whl", response_class=Response)
@@ -153,6 +260,7 @@ def get_murfey_wheel():
"""
full_path_response = requests.get("https://pypi.org/simple/murfey")
wheels = {}
+
for wheel_file in re.findall(
b"]*>([^<]*).whl",
full_path_response.content,
@@ -174,7 +282,7 @@ def get_murfey_wheel():
@cygwin.get("/setup-x86_64.exe", response_class=Response)
def get_cygwin_setup():
"""
- Obtain and past though a Cygwin installer from an official source.
+ Obtain and pass through a Cygwin installer from an official source.
This is used during client bootstrapping and can download and install the
Cygwin distribution that then remains on the client machines.
"""
diff --git a/src/murfey/server/demo_api.py b/src/murfey/server/demo_api.py
index 82d023f3e..d2276ffc1 100644
--- a/src/murfey/server/demo_api.py
+++ b/src/murfey/server/demo_api.py
@@ -32,7 +32,7 @@
get_hostname,
get_microscope,
sanitise,
- santise_path,
+ sanitise_path,
)
from murfey.server import shutdown as _shutdown
from murfey.server import templates
@@ -968,7 +968,7 @@ def flush_tomography_processing(
async def request_tomography_preprocessing(
visit_name: str, client_id: int, proc_file: ProcessFile, db=murfey_db
):
- if not santise_path(Path(proc_file.path)).exists():
+ if not sanitise_path(Path(proc_file.path)).exists():
log.warning(
f"{sanitise(str(proc_file.path))} has not been transferred before preprocessing"
)
diff --git a/src/murfey/util/__init__.py b/src/murfey/util/__init__.py
index db6f78b9b..a4b739d59 100644
--- a/src/murfey/util/__init__.py
+++ b/src/murfey/util/__init__.py
@@ -21,6 +21,10 @@
logger = logging.getLogger("murfey.util")
+def sanitise(in_string: str) -> str:
+ return in_string.replace("\r\n", "").replace("\n", "")
+
+
@lru_cache(maxsize=1)
def get_machine_config(url: str, demo: bool = False) -> dict:
return requests.get(f"{url}/machine/").json()
diff --git a/src/murfey/util/lif.py b/src/murfey/util/lif.py
index 358200f28..5dda3343c 100644
--- a/src/murfey/util/lif.py
+++ b/src/murfey/util/lif.py
@@ -15,14 +15,12 @@
from readlif.reader import LifFile
from tifffile import imwrite
+from murfey.util import sanitise
+
# Create logger object to output messages with
logger = logging.getLogger("murfey.util.lif")
-def sanitise(in_string: str) -> str:
- return in_string.replace("\r\n", "").replace("\n", "")
-
-
def get_xml_metadata(
file: LifFile,
save_xml: Optional[Path] = None,