Skip to content

Commit

Permalink
fix: Improve proxies handling
Browse files Browse the repository at this point in the history
Related #927
  • Loading branch information
Rafiot committed Jul 24, 2024
1 parent 82bd5b6 commit c4a699f
Showing 1 changed file with 17 additions and 4 deletions.
21 changes: 17 additions & 4 deletions playwrightcapture/capture.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from logging import LoggerAdapter, Logger
from tempfile import NamedTemporaryFile
from typing import Any, TypedDict, Literal, TYPE_CHECKING, MutableMapping, Generator
from urllib.parse import urlparse, unquote, urljoin
from urllib.parse import urlparse, unquote, urljoin, urlsplit, urlunsplit
from zipfile import ZipFile

import aiohttp
Expand Down Expand Up @@ -164,7 +164,7 @@ def __init__(self, browser: BROWSER | None=None, device_name: str | None=None,
self.proxy: ProxySettings = {}
if proxy:
if isinstance(proxy, str):
self.proxy = {'server': proxy}
self.proxy = self.__prepare_proxy_playwright(proxy)
elif isinstance(proxy, dict):
self.proxy = {'server': proxy['server'], 'bypass': proxy.get('bypass', ''),
'username': proxy.get('username', ''),
Expand All @@ -187,6 +187,19 @@ def __init__(self, browser: BROWSER | None=None, device_name: str | None=None,
self._locale: str = ''
self._color_scheme: Literal['dark', 'light', 'no-preference', 'null'] | None = None

def __prepare_proxy_playwright(self, proxy: str) -> ProxySettings:
splitted = urlsplit(proxy)
if splitted.username and splitted.password:
return {'username': splitted.username, 'password': splitted.password,
'server': urlunsplit((splitted.scheme, f'{splitted.hostname}:{splitted.port}', splitted.path, splitted.query, splitted.fragment))}
return {'server': proxy}

def __prepare_proxy_aiohttp(self, proxy: ProxySettings) -> str:
if 'username' in proxy and 'password' in proxy:
splitted = urlsplit(proxy['server'])
return urlunsplit((splitted.scheme, f'{proxy["username"]}:{proxy["password"]}@{splitted.netloc}', splitted.path, splitted.query, splitted.fragment))
return proxy['server']

async def __aenter__(self) -> Capture:
'''Launch the browser'''
self._temp_harfile = NamedTemporaryFile(delete=False)
Expand Down Expand Up @@ -1395,9 +1408,9 @@ async def get_favicons(self, rendered_url: str, rendered_content: str) -> set[by
Method inspired by https://github.com/ail-project/ail-framework/blob/master/bin/lib/crawlers.py
"""
connector = None
if self.proxy and self.proxy.get('server'):
if self.proxy:
# NOTE 2024-05-17: switch to async to fetch, the lib uses socks5h by default
connector = ProxyConnector.from_url(self.proxy['server'])
connector = ProxyConnector.from_url(self.__prepare_proxy_aiohttp(self.proxy))

extracted_favicons = self.__extract_favicons(rendered_content)
if not extracted_favicons:
Expand Down

0 comments on commit c4a699f

Please sign in to comment.