diff --git a/src/murfey/server/api/bootstrap.py b/src/murfey/server/api/bootstrap.py
index a88d0c243..da74d5bd7 100644
--- a/src/murfey/server/api/bootstrap.py
+++ b/src/murfey/server/api/bootstrap.py
@@ -190,7 +190,7 @@ def get_murfey_wheel():
murfey.bootstrap is compatible with all relevant versions of Python.
This also ignores yanked releases, which again should be fine.
"""
- full_path_response = http_session.get("https://pypi.org/simple/murfey")
+ full_path_response = http_session.get(f"{pypi_index_url.rstrip('/')}/murfey")
wheels = {}
for wheel_file in re.findall(
@@ -198,7 +198,7 @@ def get_murfey_wheel():
full_path_response.content,
):
try:
- filename = wheel_file.decode("latin-1") + ".whl"
+ filename = wheel_file.decode("utf-8") + ".whl"
version = packaging.version.parse(filename.split("-")[1])
wheels[version] = filename
except Exception:
@@ -261,7 +261,7 @@ def find_cygwin_mirror() -> str:
mirror_priorities = {}
for mirror in mirrors.content.split(b"\n"):
- mirror_line = mirror.decode("latin1").strip().split(";")
+ mirror_line = mirror.decode("utf-8").strip().split(";")
if not mirror_line or len(mirror_line) < 4:
continue
if not mirror_line[0].startswith("http"):
@@ -493,7 +493,7 @@ def get_msys2_main_index(
# Parse and rewrite package index content
content: bytes = response.content # Get content in bytes
- content_text: str = content.decode("latin1") # Convert to strings
+ content_text: str = content.decode("utf-8") # Convert to strings
content_text_list = []
for line in content_text.splitlines():
if line.startswith(" requests.Response:
"""
@@ -1082,20 +1085,20 @@ def _get_full_pypi_path_response(package: str) -> requests.Response:
package_clean = quote(re.sub(r"[-_.]+", "-", package.lower()), safe="/")
# Get HTTP response
- url = f"https://pypi.org/simple/{package_clean}"
+ url = f"{pypi_index_url.rstrip('/')}/{package_clean}"
response = http_session.get(url)
if response.status_code != 200:
raise HTTPException(status_code=response.status_code)
return response
-@pypi.get("/", response_class=Response)
+@pypi.get("/index/", response_class=Response)
def get_pypi_index():
"""
Obtain list of all PyPI packages via the simple API (PEP 503).
"""
- response = http_session.get("https://pypi.org/simple/")
+ response = http_session.get(pypi_index_url)
return Response(
content=response.content,
status_code=response.status_code,
@@ -1103,52 +1106,34 @@ def get_pypi_index():
)
-@pypi.get("/{package}/", response_class=Response)
+@pypi.get("/index/{package}/", response_class=Response)
def get_pypi_package_downloads_list(request: Request, package: str) -> Response:
"""
Obtain list of all package downloads from PyPI via the simple API (PEP 503), and
rewrite all download URLs to point to this server, under the current directory.
"""
- def _rewrite_pypi_url(match):
- """
- Use regular expression matching to rewrite the URLs. Points them from
- pythonhosted.org to current server, and removes the hash from the URL as well
- """
- # url = match.group(4) # Original
- url = match.group(3)
- return '" + match.group(3) + ""
-
logger.debug(f"Received request to access {str(request.url)!r}")
+ # Construct base URL to rewrite with
+ netloc = resolve_netloc(request)
+ scheme = request.headers.get("X-Forwarded-Proto", request.url.scheme)
+ router_path = request.url.path.removesuffix(f"/index/{package}/")
+ base_url = f"{scheme}://{netloc}{router_path}"
+
# Validate package and URL
full_path_response = _get_full_pypi_path_response(package)
# Process lines related to PyPI packages in response
content: bytes = full_path_response.content # In bytes
- content_text: str = content.decode("latin1") # Convert to strings
- content_text_list = []
- for line in content_text.splitlines():
- # Look for lines with hyperlinks
- if "]*)"([^>]*)>([^<]*)', # Regex search criteria
- _rewrite_pypi_url, # Function to apply search criteria to
- line,
- )
- content_text_list.append(line_new)
-
- # Add entry for wheel metadata (PEP 658; see _expose_wheel_metadata)
- if ".whl" in line_new:
- line_metadata = line_new.replace(".whl", ".whl.metadata")
- content_text_list.append(line_metadata)
- else:
- # Append other lines as normal
- content_text_list.append(line)
+ content_text: str = content.decode("utf-8") # Convert to strings
- content_text_new = str("\n".join(content_text_list)) # Regenerate HTML structure
- content_new = content_text_new.encode("latin1") # Convert back to bytes
+ # PyPI's simple index now directly points to https://pythonhosted.org
+ # It also uses newlines partway through the '' blocks now
+ # It's thus now better to use regex substitution on the page as a whole
+ content_text_new = re.sub(re.escape(python_repo_url), base_url, content_text)
+
+ content_new = content_text_new.encode("utf-8") # Convert back to bytes
return Response(
content=content_new,
@@ -1157,76 +1142,29 @@ def _rewrite_pypi_url(match):
)
-@pypi.get("/{package}/{filename}", response_class=StreamingResponse)
+@pypi.get("/packages/{a}/{b}/{c}/{filename}", response_class=StreamingResponse)
def get_pypi_file(
request: Request,
- package: str,
+ a: str,
+ b: str,
+ c: str,
filename: str,
):
"""
Obtain and pass through a specific download for a PyPI package.
"""
-
- def _expose_wheel_metadata(response_bytes: bytes) -> bytes:
- """
- As of pip v22.3 (coinciding with PEP 658), pip expects to find an additonal
- ".whl.metadata" file based on the URL of the ".whl" file present on the PyPI Simple
- Index. However, because it is not listed on the webpage itself, it is not copied
- across to the proxy. This function adds that URL to the proxy explicitly.
- """
-
- # Analyse API response line-by-line
- response_text: str = response_bytes.decode("latin1") # Convert to text
- response_text_list = [] # Write line-by-line analysis to here
-
- for line in response_text.splitlines():
- # Process URLs
- if r"]*)"[^>]*>' + filename_bytes + b"",
- content,
- )
- if not selected_package_link:
- raise HTTPException(status_code=404, detail="File not found for package")
- original_url = selected_package_link.group(1)
- response = http_session.get(original_url)
+ package_url = f"{python_repo_url}/packages/{a}/{b}/{c}/{filename}"
+ logger.debug(f"Forwarding package request to {package_url!r}")
+ response = http_session.get(package_url, stream=True)
# Construct headers to return with response
headers: dict[str, str] = {}
- if response.headers.get("Content-Length"):
- headers["Content-Lengh"] = response.headers["Content-Length"]
+ if response.status_code != 200:
+ raise HTTPException(status_code=response.status_code)
return StreamingResponse(
- content=response.iter_content(chunk_size=8192),
+ content=response.raw,
status_code=response.status_code,
headers=headers,
media_type=response.headers.get("Content-Type"),
diff --git a/src/murfey/util/route_manifest.yaml b/src/murfey/util/route_manifest.yaml
index d4bedb42e..e8ca73ef7 100644
--- a/src/murfey/util/route_manifest.yaml
+++ b/src/murfey/util/route_manifest.yaml
@@ -215,22 +215,26 @@ murfey.server.api.bootstrap.plugins:
methods:
- GET
murfey.server.api.bootstrap.pypi:
- - path: /pypi/
+ - path: /pypi/index/
function: get_pypi_index
path_params: []
methods:
- GET
- - path: /pypi/{package}/
+ - path: /pypi/index/{package}/
function: get_pypi_package_downloads_list
path_params:
- name: package
type: str
methods:
- GET
- - path: /pypi/{package}/{filename}
+ - path: /pypi/packages/{a}/{b}/{c}/{filename}
function: get_pypi_file
path_params:
- - name: package
+ - name: a
+ type: str
+ - name: b
+ type: str
+ - name: c
type: str
- name: filename
type: str