In [None]:
import requests


def test_proxy(proxies_list):
    test_url = "https://httpbin.org/ip"

    for proxy in proxies_list:
        proxies = {"http": f"http://{proxy}", "https": f"http://{proxy}"}  # Add "socks5://" for SOCKS proxies if needed
        try:
            response = requests.get(test_url, proxies=proxies, timeout=5, verify=False)  # Disable SSL verification
            print(response.headers)
            print(response.content)
            if response.status_code == 200:
                print(f"✅ Proxy {proxy} is working: {response.text}")
        except requests.RequestException as e:
            print(f"❌ Proxy {proxy} failed: {e}")

In [48]:
import asyncio
import logging
import random
import time
from typing import List, Optional
from urllib.parse import urlparse


from fp.fp import FreeProxy
from google_scholar_research_tool import ProxyManager

# Set up logging
logging.basicConfig(
    format="%(asctime)s | %(levelname)s | %(filename)s:%(lineno)d | %(funcName)s | %(message)s",
    level=logging.DEBUG,
)
logger = logging.getLogger(__name__)


class NoProxiesAvailable(Exception):
    pass


class ProxyManager:
    def __init__(self, timeout=5, refresh_interval=300, blacklist_duration=600, num_proxies=20):
        self.logger = logging.getLogger(__name__)
        self.fp = FreeProxy()
        self.proxy_list = []
        self.blacklist = {}  # {proxy: timestamp}
        self.refresh_interval = refresh_interval
        self.blacklist_duration = blacklist_duration
        self.last_refresh = 0
        self.num_proxies = num_proxies
        self.timeout = timeout
        self.test_url = "https://httpbin.org/ip"
        self.semaphore = asyncio.Semaphore(num_proxies)

    async def _test_proxy(self, proxy: str) -> Optional[str]:
        """Test if a proxy is working using `requests` and return it if successful."""
        if proxy in self.blacklist and time.time() - self.blacklist[proxy] < self.blacklist_duration:
            return None  # Skip testing blacklisted proxies

        def test():
            """Synchronous function to test the proxy."""
            try:
                proxies = {"http": f"http://{proxy}", "https": f"https://{proxy}"}
                self.logger.info(f"Testing proxy: {proxy}")
                start_time = time.time()
                response = requests.get(self.test_url, proxies=proxies, timeout=self.timeout, verify=False)
                response.raise_for_status()  # Raise error for HTTP failures
                latency = time.time() - start_time
                if response.status_code == 200:
                    self.logger.info(f"Proxy {proxy} working with latency: {latency:.2f}s")
                    return proxy, latency
                self.logger.info(f"Proxy {proxy} failed with status code {response.status_code}")
                return None
            except (requests.RequestException, requests.Timeout) as e:
                self.logger.debug(f"Proxy {proxy} failed: {type(e).__name__}: {e}")
                return None

        result = await asyncio.to_thread(test)
        if result:
            return result[0]  # Return the proxy if it passed
        return None

    # Additional logging for raw proxies list
    async def get_working_proxies(self) -> List[str]:
        """Fetch, test, and return a list of working proxies."""
        current_time = time.time()
        if current_time - self.last_refresh < self.refresh_interval and self.proxy_list:
            return self.proxy_list  # Use cached list if still valid

        raw_proxies = self.fp.get_proxy_list(repeat=True)
        print(f"Fetched proxies: {raw_proxies}")  # Print raw proxies to check format
        if not raw_proxies:
            self.logger.warning("No proxies found from FreeProxy.")
            raise NoProxiesAvailable("No raw proxies found.")

        # Test proxies concurrently
        tasks = [self._test_proxy(proxy) for proxy in raw_proxies]
        results = await asyncio.gather(*tasks)

        # Filter out failed proxies and sort by latency
        working_proxies = sorted([res for res in results if res], key=lambda x: x[1])[: self.num_proxies]
        self.proxy_list = [p[0] for p in working_proxies]
        self.last_refresh = time.time()

        if not self.proxy_list:
            self.logger.warning("No working proxies found.")
            raise NoProxiesAvailable("No working proxies found.")

        return self.proxy_list

    async def refresh_proxies(self):
        """Force refresh the proxy list by fetching and testing new proxies."""
        await self.get_working_proxies()

    async def get_random_proxy(self) -> Optional[str]:
        """Return a random working proxy."""
        try:
            if not self.proxy_list:
                await self.refresh_proxies()
            return random.choice(self.proxy_list) if self.proxy_list else None
        except NoProxiesAvailable:
            return None

    def remove_proxy(self, proxy: str):
        """Remove a proxy and blacklist it."""
        if proxy in self.proxy_list:
            self.proxy_list.remove(proxy)
            self.blacklist[proxy] = time.time()


In [62]:
proxy_manager = ProxyManager(timeout=5)
# working_proxies = asyncio.run(proxy_manager.get_working_proxies())


def get_proxies():
    proxy = FreeProxy().get_proxy_list(repeat=True)
    return proxy


proxies_list = get_proxies()


# pprint(proxies_list)
async def test_proxy(proxies_list):
    # test_url = "https://httpbin.org/ip"

    for proxy in proxies_list[:30]:
        logger.debug(proxy)
        return await proxy_manager._test_proxy(proxy)


working_proxies = await test_proxy(proxies_list)
print(working_proxies)


2025-02-18 06:37:49,264 | DEBUG | connectionpool.py:1049 | _new_conn | Starting new HTTPS connection (1): free-proxy-list.net:443
2025-02-18 06:37:49,300 | DEBUG | connectionpool.py:544 | _make_request | https://free-proxy-list.net:443 "GET / HTTP/1.1" 200 None
2025-02-18 06:37:49,306 | DEBUG | 212137237.py:18 | test_proxy | 72.10.160.172:11723
2025-02-18 06:37:49,307 | DEBUG | connectionpool.py:1049 | _new_conn | Starting new HTTPS connection (1): httpbin.org:443


Testing proxy: 72.10.160.172:11723


2025-02-18 06:37:51,186 | DEBUG | 2093602308.py:58 | test | Proxy 72.10.160.172:11723 failed: ProxyError: HTTPSConnectionPool(host='httpbin.org', port=443): Max retries exceeded with url: /ip (Caused by ProxyError('Unable to connect to proxy', NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001D5D4757250>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it')))


None


In [None]:
working_proxies = await proxy_manager.get_working_proxies()
print(working_proxies)

In [None]:
random_proxy = await proxy_manager.get_random_proxy()
print(f"Random proxy: {random_proxy}")

In [None]:
async def test_proxy_manager():
    # Initialize ProxyManager
    proxy_manager = ProxyManager(timeout=5, num_proxies=5)

    # Test fetching working proxies
    try:
        print("Fetching working proxies...")
        working_proxies = await proxy_manager.get_working_proxies()
        print("Working proxies:", working_proxies)
    except NoProxiesAvailable as e:
        print("Error:", e)

    # Test getting a random working proxy
    random_proxy = proxy_manager.get_random_proxy()
    if random_proxy:
        print(f"Random proxy: {random_proxy}")
    else:
        print("No random proxy available.")

    # Test removing a proxy and blacklisting it
    if working_proxies:
        proxy_to_remove = working_proxies[0]
        print(f"Removing and blacklisting proxy: {proxy_to_remove}")
        proxy_manager.remove_proxy(proxy_to_remove)
        print("Updated proxy list:", proxy_manager.proxy_list)
        print("Blacklist:", proxy_manager.blacklist)


In [None]:
await test_proxy_manager()


In [3]:

from ipykernel.eventloops import register_integration
from proxybroker import Broker


# Only necessary inside Jupyter/IPython
@register_integration("asyncio")
def _(kernel):
    """Dummy function required for decorator to work correctly."""
    pass


async def show(proxies):
    while True:
        proxy = await proxies.get()
        if proxy is None:
            break
        print("Found proxy: %s" % proxy)


async def main():
    proxies = asyncio.Queue()
    broker = Broker(proxies)
    try:
        tasks = asyncio.gather(broker.find(types=["HTTP", "HTTPS"], limit=50), show(proxies))
        await tasks
    except Exception as e:
        print(f"An error occurred within main: {e}")


await main()


2025-02-18 08:07:15,492 | DEBUG | resolver.py:115 | get_real_ext_ip | Real external IP: 102.119.83.55
2025-02-18 08:07:15,515 | DEBUG | checker.py:59 | check_judges | Start check judges
2025-02-18 08:07:15,515 | DEBUG | api.py:332 | _grab | Start grabbing proxies
2025-02-18 08:07:15,518 | DEBUG | providers.py:75 | get_proxies | Try to get proxies from www.proxylists.net
2025-02-18 08:07:15,518 | DEBUG | providers.py:75 | get_proxies | Try to get proxies from api.proxyscrape.com
2025-02-18 08:07:15,519 | DEBUG | providers.py:75 | get_proxies | Try to get proxies from ipaddress.com
2025-02-18 08:07:15,925 | DEBUG | providers.py:117 | _find_on_page | 503(503) proxies added(received) from https://api.proxyscrape.com/?request=getproxies&proxytype=http
2025-02-18 08:07:15,926 | DEBUG | providers.py:82 | get_proxies | 503 proxies received from api.proxyscrape.com: {('41.43.162.50', '8080', ('HTTP', 'CONNECT:80', 'HTTPS', 'CONNECT:25')), ('203.76.151.50', '49200', ('HTTP', 'CONNECT:80', 'HTTPS

Found proxy: <Proxy US 0.26s [HTTP: High] 47.252.50.153:3128>
Found proxy: <Proxy US 0.26s [HTTP: Transparent] 98.64.128.182:3128>


2025-02-18 08:07:18,334 | DEBUG | proxy.py:271 | log | 162.223.90.130:80 [HTTP]: Received: 723 bytes: b'HTTP/1.1 200'; Runtime: 0.36
2025-02-18 08:07:18,334 | DEBUG | proxy.py:271 | log | 162.223.90.130:80 [HTTP]: Get: success; Runtime: 0.00
2025-02-18 08:07:18,334 | DEBUG | checker.py:274 | _send_test_request | 162.223.90.130:80 [HTTP]: (http://httpbin.org/get?show_env) rv: 4557, response: b'HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nDate: Tue, 18 Feb 2025 04:06:50 GMT\r\nServer: gunicorn/19.9.0\r\nConnection: close\r\nTransfer-Encoding: chunked\r\n\r\n200\r\n{\n  "args": {\n    "show_env": ""\n  }, \n  "headers": {\n    "Accept": "*/*", \n    "Accept-Encoding": "gzip, deflate", \n    "Cache-Control": "no-cache", \n    "Cookie": "cookie=ok", \n    "Host": "httpbin.org", \n    "Referer": "https://www.google.com/", \n    "User-Agent": "PxBroker/0.4.0/4557", \n    "X-Amzn-Trace-Id": "Root=1-67b4075a-4d67b60d2ec2d9fe7dc0385f", \n    "X-Forwarded-For": "162.223.90.130", \n    "X-

Found proxy: <Proxy US 0.32s [HTTP: High] 162.223.90.130:80>
Found proxy: <Proxy US 0.35s [HTTP: High] 198.49.68.80:80>
Found proxy: <Proxy BN 0.35s [HTTP: High] 23.247.136.248:80>


2025-02-18 08:07:18,564 | DEBUG | proxy.py:271 | log | 67.43.228.253:27321 [HTTPS]: Received: 19 bytes: b'HTTP/1.0 200'; Runtime: 1.19
2025-02-18 08:07:18,565 | DEBUG | proxy.py:271 | log | 67.43.228.253:27321 [HTTPS]: SSL: Initial connection; Runtime: 0.00
2025-02-18 08:07:18,565 | DEBUG | proxy.py:271 | log | 67.43.228.253:27321 [HTTPS]: SSL: ; Runtime: 0.00
2025-02-18 08:07:18,565 | DEBUG | proxy.py:271 | log | 67.43.228.253:27321 [HTTPS]: Connection: closed; Runtime: 0.00
2025-02-18 08:07:18,570 | DEBUG | proxy.py:271 | log | 67.43.228.250:9113 [INFO]: Selected judge: <Judge [HTTPS] httpbin.org>; Runtime: 0.00
2025-02-18 08:07:18,570 | DEBUG | proxy.py:271 | log | 67.43.228.250:9113 [HTTPS]: Initial connection; Runtime: 0.00
2025-02-18 08:07:18,572 | DEBUG | proxy.py:271 | log | 133.18.234.13:80 [HTTP]: Received: 1115 bytes: b'HTTP/1.1 200'; Runtime: 0.56
2025-02-18 08:07:18,572 | DEBUG | proxy.py:271 | log | 133.18.234.13:80 [HTTP]: Get: success; Runtime: 0.00
2025-02-18 08:07:18,

Found proxy: <Proxy JP 0.37s [HTTP: High] 133.18.234.13:80>
Found proxy: <Proxy CN 0.40s [HTTP: High] 221.231.13.198:1080>
Found proxy: <Proxy FR 0.40s [HTTP: High] 158.255.77.169:80>
Found proxy: <Proxy CN 0.42s [HTTP: High] 183.215.23.242:9091>


2025-02-18 08:07:18,814 | DEBUG | proxy.py:271 | log | 47.100.254.82:80 [HTTP]: Connection: success; Runtime: 0.58
2025-02-18 08:07:18,815 | DEBUG | proxy.py:271 | log | 47.100.254.82:80 [HTTP]: Request: b'GET http://azenv.net/ HTTP/1.1\r\nUser-Agent: PxBroker/0.4.0/4661\r\nAccept: */*\r\nAccept-Encoding: gzip, deflate\r\nPragma: no-cache\r\nCache-control: no-cache\r\nCookie: cookie=ok\r\nReferer: https://www.google.com/\r\nHost: azenv.net\r\nConnection: close\r\nContent-Length: 0\r\n\r\n'; Runtime: 0.00
2025-02-18 08:07:18,824 | DEBUG | proxy.py:271 | log | 58.243.224.244:8085 [HTTP]: Received: 1149 bytes: b'HTTP/1.1 200'; Runtime: 0.84
2025-02-18 08:07:18,825 | DEBUG | proxy.py:271 | log | 58.243.224.244:8085 [HTTP]: Get: success; Runtime: 0.00
2025-02-18 08:07:18,825 | DEBUG | checker.py:274 | _send_test_request | 58.243.224.244:8085 [HTTP]: (http://azenv.net/) rv: 4512, response: b'HTTP/1.1 200 OK\r\nServer: nginx/2.2.200603d\r\nDate: Tue, 18 Feb 2025 04:06:52 GMT\r\nContent-Type: 

Found proxy: <Proxy CN 0.43s [HTTP: High] 58.243.224.244:8085>
Found proxy: <Proxy CN 0.43s [HTTP: High] 119.3.113.150:9094>
Found proxy: <Proxy CN 0.47s [HTTP: High] 106.38.26.22:2080>
Found proxy: <Proxy CN 0.48s [HTTP: High] 110.72.59.58:8085>


2025-02-18 08:07:19,057 | DEBUG | proxy.py:271 | log | 91.108.130.18:3128 [HTTPS]: Connection: success; Runtime: 1.99
2025-02-18 08:07:19,057 | DEBUG | proxy.py:271 | log | 91.108.130.18:3128 [HTTPS]: Request: b'CONNECT httpbin.org:443 HTTP/1.1\r\nHost: httpbin.org\r\nUser-Agent: PxBroker/0.4.0/\r\nConnection: keep-alive\r\n\r\n'; Runtime: 0.00
2025-02-18 08:07:19,079 | DEBUG | proxy.py:271 | log | 38.242.202.236:8080 [HTTP]: Received: 936 bytes: b'HTTP/1.1 200'; Runtime: 0.92
2025-02-18 08:07:19,080 | DEBUG | proxy.py:271 | log | 38.242.202.236:8080 [HTTP]: Get: success; Runtime: 0.00
2025-02-18 08:07:19,080 | DEBUG | checker.py:274 | _send_test_request | 38.242.202.236:8080 [HTTP]: (http://proxyjudge.us/azenv.php) rv: 2656, response: b'HTTP/1.1 200 OK\r\nServer: nginx/1.18.0 (Ubuntu)\r\nDate: Tue, 18 Feb 2025 04:06:50 GMT\r\nContent-Type: text/html; charset=UTF-8\r\nTransfer-Encoding: chunked\r\nConnection: close\r\nCache-Control: no-cache, no-store, must-revalidate\r\nPragma: no-cac

Found proxy: <Proxy CN 0.49s [HTTP: High] 111.34.79.216:10219>


2025-02-18 08:07:19,307 | DEBUG | proxy.py:271 | log | 62.210.15.199:80 [HTTPS]: Request: b'CONNECT httpbin.org:443 HTTP/1.1\r\nHost: httpbin.org\r\nUser-Agent: PxBroker/0.4.0/\r\nConnection: keep-alive\r\n\r\n'; Runtime: 0.00
2025-02-18 08:07:19,308 | DEBUG | proxy.py:271 | log | 8.209.209.19:3389 [HTTPS]: Connection: success; Runtime: 2.26
2025-02-18 08:07:19,308 | DEBUG | proxy.py:271 | log | 8.209.209.19:3389 [HTTPS]: Request: b'CONNECT httpbin.org:443 HTTP/1.1\r\nHost: httpbin.org\r\nUser-Agent: PxBroker/0.4.0/\r\nConnection: keep-alive\r\n\r\n'; Runtime: 0.00
2025-02-18 08:07:19,372 | DEBUG | proxy.py:271 | log | 14.29.116.148:727 [HTTPS]: Connection: success; Runtime: 2.32
2025-02-18 08:07:19,373 | DEBUG | proxy.py:271 | log | 14.29.116.148:727 [HTTPS]: Request: b'CONNECT httpbin.org:443 HTTP/1.1\r\nHost: httpbin.org\r\nUser-Agent: PxBroker/0.4.0/\r\nConnection: keep-alive\r\n\r\n'; Runtime: 0.00
2025-02-18 08:07:19,375 | DEBUG | proxy.py:271 | log | 91.108.130.18:3128 [HTTPS]: 

Found proxy: <Proxy CN 0.58s [HTTP: High] 47.100.254.82:80>


2025-02-18 08:07:19,677 | DEBUG | proxy.py:271 | log | 47.250.11.111:51 [HTTPS]: Received: 63 bytes: b'HTTP/1.1 200'; Runtime: 2.25
2025-02-18 08:07:19,678 | DEBUG | proxy.py:271 | log | 47.250.11.111:51 [HTTPS]: SSL: Initial connection; Runtime: 0.00
2025-02-18 08:07:19,678 | DEBUG | proxy.py:271 | log | 47.250.11.111:51 [HTTPS]: SSL: ; Runtime: 0.00
2025-02-18 08:07:19,678 | DEBUG | proxy.py:271 | log | 47.250.11.111:51 [HTTPS]: Connection: closed; Runtime: 0.00
2025-02-18 08:07:19,682 | DEBUG | proxy.py:271 | log | 67.43.227.227:28159 [INFO]: Selected judge: <Judge [HTTPS] httpbin.org>; Runtime: 0.00
2025-02-18 08:07:19,682 | DEBUG | proxy.py:271 | log | 67.43.227.227:28159 [HTTPS]: Initial connection; Runtime: 0.00
2025-02-18 08:07:19,710 | DEBUG | proxy.py:271 | log | 91.108.130.18:3128 [HTTP]: Connection: success; Runtime: 0.33
2025-02-18 08:07:19,710 | DEBUG | proxy.py:271 | log | 91.108.130.18:3128 [HTTP]: Request: b'GET http://httpbin.org/get?show_env HTTP/1.1\r\nUser-Agent: P

Found proxy: <Proxy CN 0.73s [HTTP: High] 14.204.30.115:8085>


2025-02-18 08:07:20,259 | DEBUG | proxy.py:271 | log | 203.76.151.50:49200 [HTTPS]: Connection: success; Runtime: 3.26
2025-02-18 08:07:20,260 | DEBUG | proxy.py:271 | log | 203.76.151.50:49200 [HTTPS]: Request: b'CONNECT httpbin.org:443 HTTP/1.1\r\nHost: httpbin.org\r\nUser-Agent: PxBroker/0.4.0/\r\nConnection: keep-alive\r\n\r\n'; Runtime: 0.00
2025-02-18 08:07:20,298 | DEBUG | proxy.py:271 | log | 161.97.136.251:3128 [HTTP]: Connection: failed; Runtime: 1.60
2025-02-18 08:07:20,301 | DEBUG | proxy.py:271 | log | 157.20.244.189:1111 [INFO]: Selected judge: <Judge [HTTPS] httpbin.org>; Runtime: 0.00
2025-02-18 08:07:20,301 | DEBUG | proxy.py:271 | log | 157.20.244.189:1111 [HTTPS]: Initial connection; Runtime: 0.00
2025-02-18 08:07:20,359 | DEBUG | proxy.py:271 | log | 183.240.196.53:38080 [HTTP]: Connection: success; Runtime: 0.28
2025-02-18 08:07:20,359 | DEBUG | proxy.py:271 | log | 183.240.196.53:38080 [HTTP]: Request: b'GET http://azenv.net/ HTTP/1.1\r\nUser-Agent: PxBroker/0.4.0

Found proxy: <Proxy CN 0.39s [HTTP: High] 183.240.196.53:38080>


2025-02-18 08:07:21,323 | DEBUG | proxy.py:271 | log | 182.204.182.147:1080 [HTTP]: Connection: failed; Runtime: 2.20
2025-02-18 08:07:21,326 | DEBUG | proxy.py:271 | log | 4.155.2.13:9400 [INFO]: Selected judge: <Judge [HTTPS] httpbin.org>; Runtime: 0.00
2025-02-18 08:07:21,327 | DEBUG | proxy.py:271 | log | 4.155.2.13:9400 [HTTPS]: Initial connection; Runtime: 0.00
2025-02-18 08:07:21,352 | DEBUG | proxy.py:271 | log | 47.91.65.23:3128 [HTTPS]: Connection: success; Runtime: 0.24
2025-02-18 08:07:21,353 | DEBUG | proxy.py:271 | log | 47.91.65.23:3128 [HTTPS]: Request: b'CONNECT httpbin.org:443 HTTP/1.1\r\nHost: httpbin.org\r\nUser-Agent: PxBroker/0.4.0/\r\nConnection: keep-alive\r\n\r\n'; Runtime: 0.00
2025-02-18 08:07:21,359 | DEBUG | proxy.py:271 | log | 125.71.97.77:11105 [HTTPS]: Received: 0 bytes; Runtime: 0.25
2025-02-18 08:07:21,359 | DEBUG | proxy.py:271 | log | 125.71.97.77:11105 [HTTPS]: Connection: closed; Runtime: 0.00
2025-02-18 08:07:21,359 | DEBUG | proxy.py:271 | log |

Found proxy: <Proxy MY 1.33s [HTTP: Anonymous] 175.139.233.76:80>


2025-02-18 08:07:22,755 | DEBUG | proxy.py:271 | log | 218.13.39.150:9091 [HTTPS]: Received: 152 bytes: b'HTTP/1.1 400'; Runtime: 0.30
2025-02-18 08:07:22,756 | DEBUG | proxy.py:271 | log | 218.13.39.150:9091 [HTTPS]: Connect: failed. HTTP status: 400; Runtime: 0.00
2025-02-18 08:07:22,756 | DEBUG | proxy.py:271 | log | 218.13.39.150:9091 [HTTPS]: Connection: closed; Runtime: 0.00
2025-02-18 08:07:22,756 | DEBUG | proxy.py:271 | log | 218.13.39.150:9091 [INFO]: Selected judge: <Judge [HTTP] httpbin.org>; Runtime: 0.00
2025-02-18 08:07:22,757 | DEBUG | proxy.py:271 | log | 218.13.39.150:9091 [HTTP]: Initial connection; Runtime: 0.00
2025-02-18 08:07:23,057 | DEBUG | proxy.py:271 | log | 72.10.160.90:4767 [HTTPS]: Received: 19 bytes: b'HTTP/1.0 200'; Runtime: 5.69
2025-02-18 08:07:23,057 | DEBUG | proxy.py:271 | log | 72.10.160.90:4767 [HTTPS]: SSL: Initial connection; Runtime: 0.00
2025-02-18 08:07:23,057 | DEBUG | proxy.py:271 | log | 72.10.160.90:4767 [HTTPS]: SSL: ; Runtime: 0.00
202

Found proxy: <Proxy CN 2.07s [HTTP: High] 106.119.165.35:8877>


2025-02-18 08:07:25,575 | DEBUG | proxy.py:271 | log | 194.182.178.90:3128 [HTTP]: Received: 131 bytes: b'HTTP/1.1 400'; Runtime: 0.54
2025-02-18 08:07:25,575 | DEBUG | proxy.py:271 | log | 194.182.178.90:3128 [HTTP]: Get: failed; Runtime: 0.00
2025-02-18 08:07:25,576 | DEBUG | checker.py:274 | _send_test_request | 194.182.178.90:3128 [HTTP]: (http://httpbin.org/get?show_env) rv: 9230, response: b'HTTP/1.1 400 Bad Request\r\nDate: Tue, 18 Feb 2025 04:06:57 GMT\r\nConnection: close\r\nTransfer-Encoding: chunked\r\n\r\nb\r\nBad Request\r\n0\r\n'
2025-02-18 08:07:25,576 | DEBUG | proxy.py:271 | log | 194.182.178.90:3128 [HTTP]: Connection: closed; Runtime: 0.00
2025-02-18 08:07:25,579 | DEBUG | proxy.py:271 | log | 49.148.106.194:8080 [INFO]: Selected judge: <Judge [HTTPS] httpbin.org>; Runtime: 0.00
2025-02-18 08:07:25,580 | DEBUG | proxy.py:271 | log | 49.148.106.194:8080 [HTTPS]: Initial connection; Runtime: 0.00
2025-02-18 08:07:25,580 | DEBUG | proxy.py:271 | log | 117.81.238.69:8089

Found proxy: <Proxy CN 1.76s [HTTP: High] 59.53.80.122:10024>


2025-02-18 08:07:26,440 | DEBUG | proxy.py:271 | log | 216.229.112.25:8080 [HTTPS]: Connection: timeout; Runtime: 8.01
2025-02-18 08:07:26,441 | DEBUG | proxy.py:271 | log | 216.229.112.25:8080 [HTTPS]: Initial connection; Runtime: 0.00
2025-02-18 08:07:26,471 | DEBUG | proxy.py:271 | log | 59.39.226.244:2324 [HTTPS]: Received: timeout; Runtime: 8.00
2025-02-18 08:07:26,472 | DEBUG | proxy.py:271 | log | 59.39.226.244:2324 [HTTPS]: Connection: closed; Runtime: 0.00
2025-02-18 08:07:26,472 | DEBUG | proxy.py:271 | log | 59.39.226.244:2324 [HTTPS]: Initial connection; Runtime: 0.00
2025-02-18 08:07:26,498 | DEBUG | proxy.py:271 | log | 202.96.46.200:8010 [HTTPS]: Received: 219 bytes: b'HTTP/1.1 500'; Runtime: 1.11
2025-02-18 08:07:26,498 | DEBUG | proxy.py:271 | log | 202.96.46.200:8010 [HTTPS]: Connect: failed. HTTP status: 500; Runtime: 0.00
2025-02-18 08:07:26,498 | DEBUG | proxy.py:271 | log | 202.96.46.200:8010 [HTTPS]: Connection: closed; Runtime: 0.00
2025-02-18 08:07:26,499 | DEB

Found proxy: <Proxy CN 0.42s [HTTP: High] 183.234.215.11:8443>
Found proxy: <Proxy RU 0.69s [HTTP: Transparent] 185.105.102.189:80>


2025-02-18 08:07:26,845 | DEBUG | proxy.py:271 | log | 67.43.228.250:3187 [HTTPS]: Connection: timeout; Runtime: 8.01
2025-02-18 08:07:26,846 | DEBUG | proxy.py:271 | log | 67.43.228.250:3187 [HTTPS]: Initial connection; Runtime: 0.00
2025-02-18 08:07:26,869 | DEBUG | proxy.py:271 | log | 202.96.46.200:8010 [HTTP]: Connection: success; Runtime: 0.37
2025-02-18 08:07:26,869 | DEBUG | proxy.py:271 | log | 202.96.46.200:8010 [HTTP]: Request: b'GET http://azenv.net/ HTTP/1.1\r\nUser-Agent: PxBroker/0.4.0/9981\r\nAccept: */*\r\nAccept-Encoding: gzip, deflate\r\nPragma: no-cache\r\nCache-control: no-cache\r\nCookie: cookie=ok\r\nReferer: https://www.google.com/\r\nHost: azenv.net\r\nConnection: close\r\nContent-Length: 0\r\n\r\n'; Runtime: 0.00
2025-02-18 08:07:26,891 | DEBUG | proxy.py:271 | log | 58.33.89.106:8444 [HTTPS]: Connection: timeout; Runtime: 8.00
2025-02-18 08:07:26,892 | DEBUG | proxy.py:271 | log | 58.33.89.106:8444 [HTTPS]: Initial connection; Runtime: 0.00
2025-02-18 08:07:2

Found proxy: <Proxy CN 2.41s [HTTP: High] 125.71.97.77:11105>
Found proxy: <Proxy CN 0.86s [HTTP: High] 36.159.89.121:8000>


2025-02-18 08:07:28,695 | DEBUG | proxy.py:271 | log | 182.160.114.214:8080 [HTTPS]: Connection: timeout; Runtime: 8.01
2025-02-18 08:07:28,696 | DEBUG | proxy.py:271 | log | 182.160.114.214:8080 [HTTPS]: Initial connection; Runtime: 0.00
2025-02-18 08:07:28,702 | DEBUG | proxy.py:271 | log | 59.153.99.171:8080 [HTTPS]: Received: 19 bytes: b'HTTP/1.1 200'; Runtime: 0.77
2025-02-18 08:07:28,703 | DEBUG | proxy.py:271 | log | 59.153.99.171:8080 [HTTPS]: SSL: Initial connection; Runtime: 0.00
2025-02-18 08:07:28,703 | DEBUG | proxy.py:271 | log | 59.153.99.171:8080 [HTTPS]: SSL: ; Runtime: 0.00
2025-02-18 08:07:28,703 | DEBUG | proxy.py:271 | log | 59.153.99.171:8080 [HTTPS]: Connection: closed; Runtime: 0.00
2025-02-18 08:07:28,706 | DEBUG | proxy.py:271 | log | 47.251.73.54:5060 [INFO]: Selected judge: <Judge [HTTPS] httpbin.org>; Runtime: 0.00
2025-02-18 08:07:28,706 | DEBUG | proxy.py:271 | log | 47.251.73.54:5060 [HTTPS]: Initial connection; Runtime: 0.00
2025-02-18 08:07:28,761 | DE

Found proxy: <Proxy CN 1.94s [HTTP: High] 202.96.46.200:8010>


2025-02-18 08:07:31,101 | DEBUG | proxy.py:271 | log | 58.147.186.31:3125 [HTTPS]: Connection: timeout; Runtime: 8.02
2025-02-18 08:07:31,101 | DEBUG | proxy.py:271 | log | 58.147.186.31:3125 [HTTPS]: Initial connection; Runtime: 0.00
2025-02-18 08:07:31,255 | DEBUG | proxy.py:271 | log | 47.129.37.172:3128 [HTTPS]: Received: timeout; Runtime: 8.01
2025-02-18 08:07:31,255 | DEBUG | proxy.py:271 | log | 47.129.37.172:3128 [HTTPS]: Connection: closed; Runtime: 0.00
2025-02-18 08:07:31,256 | DEBUG | proxy.py:271 | log | 47.129.37.172:3128 [HTTPS]: Initial connection; Runtime: 0.00
2025-02-18 08:07:31,260 | DEBUG | proxy.py:271 | log | 218.1.197.148:2324 [HTTPS]: Connection: success; Runtime: 3.34
2025-02-18 08:07:31,261 | DEBUG | proxy.py:271 | log | 218.1.197.148:2324 [HTTPS]: Request: b'CONNECT httpbin.org:443 HTTP/1.1\r\nHost: httpbin.org\r\nUser-Agent: PxBroker/0.4.0/\r\nConnection: keep-alive\r\n\r\n'; Runtime: 0.00
2025-02-18 08:07:31,283 | DEBUG | proxy.py:271 | log | 103.165.40.85

Found proxy: <Proxy CN 0.93s [HTTP: High] 117.36.76.86:8085>


2025-02-18 08:07:33,023 | DEBUG | proxy.py:271 | log | 23.138.88.96:999 [HTTPS]: Connection: timeout; Runtime: 8.01
2025-02-18 08:07:33,023 | DEBUG | proxy.py:271 | log | 23.138.88.96:999 [HTTPS]: Initial connection; Runtime: 0.00
2025-02-18 08:07:33,024 | DEBUG | proxy.py:271 | log | 189.22.234.40:80 [HTTPS]: Connection: timeout; Runtime: 8.01
2025-02-18 08:07:33,024 | DEBUG | proxy.py:271 | log | 189.22.234.40:80 [HTTPS]: Initial connection; Runtime: 0.00
2025-02-18 08:07:33,025 | DEBUG | proxy.py:271 | log | 114.91.25.59:1080 [HTTPS]: Connection: timeout; Runtime: 8.01
2025-02-18 08:07:33,025 | DEBUG | proxy.py:271 | log | 114.91.25.59:1080 [HTTPS]: Initial connection; Runtime: 0.00
2025-02-18 08:07:33,026 | DEBUG | proxy.py:271 | log | 45.65.172.71:8080 [HTTPS]: Connection: timeout; Runtime: 8.01
2025-02-18 08:07:33,026 | DEBUG | proxy.py:271 | log | 45.65.172.71:8080 [HTTPS]: Initial connection; Runtime: 0.00
2025-02-18 08:07:33,027 | DEBUG | proxy.py:271 | log | 2.138.49.5:3128 [

Found proxy: <Proxy CN 0.98s [HTTP: High] 218.13.39.150:9091>


2025-02-18 08:07:35,320 | DEBUG | proxy.py:271 | log | 62.210.15.199:80 [HTTPS]: Connection: timeout; Runtime: 8.01
2025-02-18 08:07:35,321 | DEBUG | proxy.py:271 | log | 62.210.15.199:80 [HTTPS]: Initial connection; Runtime: 0.00
2025-02-18 08:07:35,367 | DEBUG | proxy.py:271 | log | 45.63.8.76:80 [HTTPS]: Connection: timeout; Runtime: 8.01
2025-02-18 08:07:35,367 | DEBUG | proxy.py:271 | log | 45.63.8.76:80 [HTTPS]: Initial connection; Runtime: 0.00
2025-02-18 08:07:35,413 | DEBUG | proxy.py:271 | log | 201.91.248.67:20183 [HTTPS]: Connection: timeout; Runtime: 8.00
2025-02-18 08:07:35,414 | DEBUG | proxy.py:271 | log | 201.91.248.67:20183 [HTTPS]: Initial connection; Runtime: 0.00
2025-02-18 08:07:35,414 | DEBUG | proxy.py:271 | log | 67.43.236.18:21159 [HTTPS]: Received: timeout; Runtime: 8.00
2025-02-18 08:07:35,414 | DEBUG | proxy.py:271 | log | 67.43.236.18:21159 [HTTPS]: Connection: closed; Runtime: 0.00
2025-02-18 08:07:35,415 | DEBUG | proxy.py:271 | log | 67.43.236.18:21159 

Found proxy: <Proxy CN 0.45s [HTTP: High] 101.66.199.171:8085>


2025-02-18 08:07:39,848 | DEBUG | proxy.py:271 | log | 197.232.85.163:8080 [HTTPS]: Connection: timeout; Runtime: 8.00
2025-02-18 08:07:39,849 | DEBUG | proxy.py:271 | log | 197.232.85.163:8080 [HTTPS]: Initial connection; Runtime: 0.00
2025-02-18 08:07:39,973 | DEBUG | proxy.py:271 | log | 117.81.238.69:8089 [HTTPS]: Received: timeout; Runtime: 8.01
2025-02-18 08:07:39,973 | DEBUG | proxy.py:271 | log | 117.81.238.69:8089 [HTTPS]: Connection: closed; Runtime: 0.00
2025-02-18 08:07:39,974 | DEBUG | proxy.py:271 | log | 117.81.238.69:8089 [HTTPS]: Initial connection; Runtime: 0.00
2025-02-18 08:07:40,199 | DEBUG | proxy.py:271 | log | 67.43.228.250:33183 [HTTPS]: Connection: success; Runtime: 6.31
2025-02-18 08:07:40,200 | DEBUG | proxy.py:271 | log | 67.43.228.250:33183 [HTTPS]: Request: b'CONNECT httpbin.org:443 HTTP/1.1\r\nHost: httpbin.org\r\nUser-Agent: PxBroker/0.4.0/\r\nConnection: keep-alive\r\n\r\n'; Runtime: 0.00
2025-02-18 08:07:40,545 | DEBUG | proxy.py:271 | log | 47.236.8.

Found proxy: <Proxy TZ 0.86s [HTTP: Anonymous] 41.59.90.171:80>


2025-02-18 08:07:45,859 | DEBUG | proxy.py:271 | log | 201.91.248.67:20183 [HTTP]: Connection: success; Runtime: 2.44
2025-02-18 08:07:45,860 | DEBUG | proxy.py:271 | log | 201.91.248.67:20183 [HTTP]: Request: b'GET http://httpbin.org/get?show_env HTTP/1.1\r\nUser-Agent: PxBroker/0.4.0/9418\r\nAccept: */*\r\nAccept-Encoding: gzip, deflate\r\nPragma: no-cache\r\nCache-control: no-cache\r\nCookie: cookie=ok\r\nReferer: https://www.google.com/\r\nHost: httpbin.org\r\nConnection: close\r\nContent-Length: 0\r\n\r\n'; Runtime: 0.00
2025-02-18 08:07:45,934 | DEBUG | proxy.py:271 | log | 122.224.124.26:12080 [HTTPS]: Connection: success; Runtime: 0.30
2025-02-18 08:07:45,934 | DEBUG | proxy.py:271 | log | 122.224.124.26:12080 [HTTPS]: Request: b'CONNECT httpbin.org:443 HTTP/1.1\r\nHost: httpbin.org\r\nUser-Agent: PxBroker/0.4.0/\r\nConnection: keep-alive\r\n\r\n'; Runtime: 0.00
2025-02-18 08:07:45,957 | DEBUG | proxy.py:271 | log | 116.228.73.230:650 [HTTPS]: Received: 145 bytes: b'HTTP/1.1 40

Found proxy: <Proxy US 1.70s [HTTP: High] 20.27.86.185:8080>


2025-02-18 08:07:46,497 | DEBUG | proxy.py:271 | log | 118.113.245.112:2324 [HTTPS]: Connection: timeout; Runtime: 8.01
2025-02-18 08:07:46,497 | DEBUG | proxy.py:271 | log | 118.113.245.112:2324 [HTTPS]: Initial connection; Runtime: 0.00
2025-02-18 08:07:46,512 | DEBUG | proxy.py:271 | log | 103.156.17.56:8181 [HTTPS]: Connection: timeout; Runtime: 8.01
2025-02-18 08:07:46,513 | DEBUG | proxy.py:271 | log | 103.156.17.56:8181 [HTTPS]: Selected judge: <Judge [HTTP] httpbin.org>; Runtime: 0.00
2025-02-18 08:07:46,513 | DEBUG | proxy.py:271 | log | 103.156.17.56:8181 [HTTP]: Initial connection; Runtime: 0.00
2025-02-18 08:07:46,548 | DEBUG | proxy.py:271 | log | 47.91.65.23:3128 [HTTP]: Connection: success; Runtime: 0.19
2025-02-18 08:07:46,549 | DEBUG | proxy.py:271 | log | 47.91.65.23:3128 [HTTP]: Request: b'GET http://azenv.net/ HTTP/1.1\r\nUser-Agent: PxBroker/0.4.0/1699\r\nAccept: */*\r\nAccept-Encoding: gzip, deflate\r\nPragma: no-cache\r\nCache-control: no-cache\r\nCookie: cookie=

Found proxy: <Proxy CN 0.26s [HTTP: High] 116.228.73.230:650>


2025-02-18 08:07:46,947 | DEBUG | proxy.py:271 | log | 4.155.2.13:9401 [HTTP]: Connection: timeout; Runtime: 8.01
2025-02-18 08:07:46,951 | DEBUG | proxy.py:271 | log | 159.65.245.255:80 [INFO]: Selected judge: <Judge [HTTPS] httpbin.org>; Runtime: 0.00
2025-02-18 08:07:46,951 | DEBUG | proxy.py:271 | log | 159.65.245.255:80 [HTTPS]: Initial connection; Runtime: 0.00
2025-02-18 08:07:47,118 | DEBUG | proxy.py:271 | log | 58.147.186.31:3125 [HTTPS]: Connection: timeout; Runtime: 8.02
2025-02-18 08:07:47,119 | DEBUG | proxy.py:271 | log | 58.147.186.31:3125 [HTTPS]: Selected judge: <Judge [HTTP] proxyjudge.us>; Runtime: 0.00
2025-02-18 08:07:47,120 | DEBUG | proxy.py:271 | log | 58.147.186.31:3125 [HTTP]: Initial connection; Runtime: 0.00
2025-02-18 08:07:47,274 | DEBUG | proxy.py:271 | log | 47.129.37.172:3128 [HTTPS]: Connection: timeout; Runtime: 8.02
2025-02-18 08:07:47,274 | DEBUG | proxy.py:271 | log | 47.129.37.172:3128 [HTTPS]: Selected judge: <Judge [HTTP] httpbin.org>; Runtime:

Found proxy: <Proxy CN 0.43s [HTTP: High] 122.224.124.26:12080>


2025-02-18 08:07:47,561 | DEBUG | proxy.py:271 | log | 181.78.95.100:999 [HTTP]: Request: b'GET http://azenv.net/ HTTP/1.1\r\nUser-Agent: PxBroker/0.4.0/7496\r\nAccept: */*\r\nAccept-Encoding: gzip, deflate\r\nPragma: no-cache\r\nCache-control: no-cache\r\nCookie: cookie=ok\r\nReferer: https://www.google.com/\r\nHost: azenv.net\r\nConnection: close\r\nContent-Length: 0\r\n\r\n'; Runtime: 0.00
2025-02-18 08:07:47,615 | DEBUG | proxy.py:271 | log | 51.254.197.101:3128 [HTTPS]: Connection: timeout; Runtime: 8.01
2025-02-18 08:07:47,616 | DEBUG | proxy.py:271 | log | 51.254.197.101:3128 [HTTPS]: Selected judge: <Judge [HTTP] httpbin.org>; Runtime: 0.00
2025-02-18 08:07:47,616 | DEBUG | proxy.py:271 | log | 51.254.197.101:3128 [HTTP]: Initial connection; Runtime: 0.00
2025-02-18 08:07:47,617 | DEBUG | proxy.py:271 | log | 36.37.224.125:8080 [HTTPS]: Connection: timeout; Runtime: 8.01
2025-02-18 08:07:47,617 | DEBUG | proxy.py:271 | log | 36.37.224.125:8080 [HTTPS]: Initial connection; Runti

Found proxy: <Proxy SN 2.67s [HTTP: High] 154.65.39.7:80>


2025-02-18 08:07:49,297 | DEBUG | proxy.py:271 | log | 218.77.183.214:5224 [INFO]: Selected judge: <Judge [HTTPS] httpbin.org>; Runtime: 0.00
2025-02-18 08:07:49,297 | DEBUG | proxy.py:271 | log | 218.77.183.214:5224 [HTTPS]: Initial connection; Runtime: 0.00
2025-02-18 08:07:49,323 | DEBUG | proxy.py:271 | log | 78.28.152.111:80 [HTTP]: Connection: timeout; Runtime: 8.00
2025-02-18 08:07:49,324 | DEBUG | proxy.py:271 | log | 78.28.152.111:80 [HTTP]: Initial connection; Runtime: 0.00
2025-02-18 08:07:49,337 | DEBUG | proxy.py:271 | log | 67.43.236.18:28835 [HTTP]: Connection: success; Runtime: 0.28
2025-02-18 08:07:49,338 | DEBUG | proxy.py:271 | log | 67.43.236.18:28835 [HTTP]: Request: b'GET http://proxyjudge.us/azenv.php HTTP/1.1\r\nUser-Agent: PxBroker/0.4.0/5673\r\nAccept: */*\r\nAccept-Encoding: gzip, deflate\r\nPragma: no-cache\r\nCache-control: no-cache\r\nCookie: cookie=ok\r\nReferer: https://www.google.com/\r\nHost: proxyjudge.us\r\nConnection: close\r\nContent-Length: 0\r\n\

Found proxy: <Proxy AR 4.16s [HTTP: Transparent] 181.78.95.100:999>
Found proxy: <Proxy CA 1.68s [HTTP: High] 72.10.164.178:6253>


2025-02-18 08:07:49,616 | DEBUG | proxy.py:271 | log | 218.77.183.214:5224 [HTTPS]: Request: b'CONNECT httpbin.org:443 HTTP/1.1\r\nHost: httpbin.org\r\nUser-Agent: PxBroker/0.4.0/\r\nConnection: keep-alive\r\n\r\n'; Runtime: 0.00
2025-02-18 08:07:49,726 | DEBUG | proxy.py:271 | log | 103.158.162.18:8080 [HTTP]: Connection: timeout; Runtime: 8.01
2025-02-18 08:07:49,729 | DEBUG | proxy.py:271 | log | 103.190.60.28:9090 [INFO]: Selected judge: <Judge [HTTPS] httpbin.org>; Runtime: 0.00
2025-02-18 08:07:49,729 | DEBUG | proxy.py:271 | log | 103.190.60.28:9090 [HTTPS]: Initial connection; Runtime: 0.00
2025-02-18 08:07:49,772 | DEBUG | proxy.py:271 | log | 45.184.152.129:999 [HTTPS]: Connection: timeout; Runtime: 8.01
2025-02-18 08:07:49,773 | DEBUG | proxy.py:271 | log | 45.184.152.129:999 [HTTPS]: Initial connection; Runtime: 0.00
2025-02-18 08:07:49,774 | DEBUG | proxy.py:271 | log | 188.215.245.235:80 [HTTPS]: Received: timeout; Runtime: 8.00
2025-02-18 08:07:49,774 | DEBUG | proxy.py:

Found proxy: <Proxy CN 0.47s [HTTP: High] 218.77.183.214:5224>


2025-02-18 08:07:51,438 | DEBUG | proxy.py:271 | log | 67.43.236.18:21159 [HTTP]: Connection: timeout; Runtime: 8.02
2025-02-18 08:07:51,439 | DEBUG | proxy.py:271 | log | 67.43.236.18:21159 [HTTP]: Initial connection; Runtime: 0.00
2025-02-18 08:07:51,531 | DEBUG | proxy.py:271 | log | 195.175.29.38:9090 [HTTP]: Connection: timeout; Runtime: 8.02
2025-02-18 08:07:51,532 | DEBUG | proxy.py:271 | log | 195.175.29.38:9090 [HTTP]: Initial connection; Runtime: 0.00
2025-02-18 08:07:51,749 | DEBUG | proxy.py:271 | log | 154.65.39.8:80 [HTTP]: Connection: timeout; Runtime: 8.00
2025-02-18 08:07:51,750 | DEBUG | proxy.py:271 | log | 154.65.39.8:80 [HTTP]: Initial connection; Runtime: 0.00
2025-02-18 08:07:51,843 | DEBUG | proxy.py:271 | log | 45.119.55.157:3128 [HTTP]: Connection: timeout; Runtime: 8.00
2025-02-18 08:07:51,843 | DEBUG | proxy.py:271 | log | 45.119.55.157:3128 [HTTP]: Initial connection; Runtime: 0.00
2025-02-18 08:07:51,859 | DEBUG | proxy.py:271 | log | 103.156.221.106:8080 

Found proxy: <Proxy MX 1.75s [HTTP: Transparent] 200.94.96.174:999>


2025-02-18 08:07:52,857 | DEBUG | proxy.py:271 | log | 67.43.227.227:28159 [HTTP]: Connection: success; Runtime: 0.30
2025-02-18 08:07:52,857 | DEBUG | proxy.py:271 | log | 67.43.227.227:28159 [HTTP]: Request: b'GET http://azenv.net/ HTTP/1.1\r\nUser-Agent: PxBroker/0.4.0/7333\r\nAccept: */*\r\nAccept-Encoding: gzip, deflate\r\nPragma: no-cache\r\nCache-control: no-cache\r\nCookie: cookie=ok\r\nReferer: https://www.google.com/\r\nHost: azenv.net\r\nConnection: close\r\nContent-Length: 0\r\n\r\n'; Runtime: 0.00
2025-02-18 08:07:52,930 | DEBUG | proxy.py:271 | log | 197.48.158.52:8080 [HTTPS]: Connection: success; Runtime: 0.36
2025-02-18 08:07:52,930 | DEBUG | proxy.py:271 | log | 197.48.158.52:8080 [HTTPS]: Request: b'CONNECT httpbin.org:443 HTTP/1.1\r\nHost: httpbin.org\r\nUser-Agent: PxBroker/0.4.0/\r\nConnection: keep-alive\r\n\r\n'; Runtime: 0.00
2025-02-18 08:07:52,964 | DEBUG | proxy.py:271 | log | 188.132.222.40:8080 [HTTP]: Received: timeout; Runtime: 8.01
2025-02-18 08:07:52,9

Found proxy: <Proxy -- 2.12s [HTTP: Transparent] 103.177.235.207:83>


2025-02-18 08:07:53,618 | DEBUG | proxy.py:271 | log | 88.42.237.102:8080 [HTTPS]: Connection: timeout; Runtime: 8.01
2025-02-18 08:07:53,618 | DEBUG | proxy.py:271 | log | 88.42.237.102:8080 [HTTPS]: Initial connection; Runtime: 0.00
2025-02-18 08:07:53,743 | DEBUG | proxy.py:271 | log | 103.152.112.120:80 [HTTPS]: Received: 152 bytes: b'HTTP/1.1 400'; Runtime: 0.35
2025-02-18 08:07:53,744 | DEBUG | proxy.py:271 | log | 103.152.112.120:80 [HTTPS]: Connect: failed. HTTP status: 400; Runtime: 0.00
2025-02-18 08:07:53,744 | DEBUG | proxy.py:271 | log | 103.152.112.120:80 [HTTPS]: Connection: closed; Runtime: 0.00
2025-02-18 08:07:53,744 | DEBUG | proxy.py:271 | log | 103.152.112.120:80 [INFO]: Selected judge: <Judge [HTTP] httpbin.org>; Runtime: 0.00
2025-02-18 08:07:53,745 | DEBUG | proxy.py:271 | log | 103.152.112.120:80 [HTTP]: Initial connection; Runtime: 0.00
2025-02-18 08:07:53,819 | DEBUG | proxy.py:271 | log | 218.1.197.148:2324 [HTTPS]: Received: timeout; Runtime: 8.00
2025-02-1

Found proxy: <Proxy BR 1.71s [HTTP: Transparent] 170.80.50.1:8080>


2025-02-18 08:07:54,488 | DEBUG | proxy.py:271 | log | 180.125.186.154:1080 [HTTPS]: Connection: failed; Runtime: 2.02
2025-02-18 08:07:54,489 | DEBUG | proxy.py:271 | log | 180.125.186.154:1080 [HTTPS]: Selected judge: <Judge [HTTP] proxyjudge.us>; Runtime: 0.00
2025-02-18 08:07:54,489 | DEBUG | proxy.py:271 | log | 180.125.186.154:1080 [HTTP]: Initial connection; Runtime: 0.00
2025-02-18 08:07:54,501 | DEBUG | proxy.py:271 | log | 118.113.245.112:2324 [HTTPS]: Connection: timeout; Runtime: 8.00
2025-02-18 08:07:54,502 | DEBUG | proxy.py:271 | log | 118.113.245.112:2324 [HTTPS]: Selected judge: <Judge [HTTP] proxyjudge.us>; Runtime: 0.00
2025-02-18 08:07:54,502 | DEBUG | proxy.py:271 | log | 118.113.245.112:2324 [HTTP]: Initial connection; Runtime: 0.00
2025-02-18 08:07:54,517 | DEBUG | proxy.py:271 | log | 103.156.17.56:8181 [HTTP]: Connection: timeout; Runtime: 8.00
2025-02-18 08:07:54,517 | DEBUG | proxy.py:271 | log | 103.156.17.56:8181 [HTTP]: Initial connection; Runtime: 0.00
20

Found proxy: <Proxy -- 0.43s [HTTP: High] 103.152.112.120:80>


2025-02-18 08:07:55,124 | DEBUG | proxy.py:271 | log | 58.147.186.31:3125 [HTTP]: Connection: timeout; Runtime: 8.00
2025-02-18 08:07:55,124 | DEBUG | proxy.py:271 | log | 58.147.186.31:3125 [HTTP]: Initial connection; Runtime: 0.00
2025-02-18 08:07:55,340 | DEBUG | proxy.py:271 | log | 41.43.162.50:8080 [HTTP]: Connection: timeout; Runtime: 8.00
2025-02-18 08:07:55,341 | DEBUG | proxy.py:271 | log | 41.43.162.50:8080 [HTTP]: Initial connection; Runtime: 0.00
2025-02-18 08:07:55,342 | DEBUG | proxy.py:271 | log | 47.91.65.23:3128 [HTTP]: Connection: success; Runtime: 0.78
2025-02-18 08:07:55,343 | DEBUG | proxy.py:271 | log | 47.91.65.23:3128 [HTTP]: Request: b'GET http://azenv.net/ HTTP/1.1\r\nUser-Agent: PxBroker/0.4.0/8919\r\nAccept: */*\r\nAccept-Encoding: gzip, deflate\r\nPragma: no-cache\r\nCache-control: no-cache\r\nCookie: cookie=ok\r\nReferer: https://www.google.com/\r\nHost: azenv.net\r\nConnection: close\r\nContent-Length: 0\r\n\r\n'; Runtime: 0.00
2025-02-18 08:07:55,376 | 

Found proxy: <Proxy AR 1.61s [HTTP: High] 190.103.177.131:80>


2025-02-18 08:07:59,031 | DEBUG | proxy.py:271 | log | 211.234.125.3:443 [HTTP]: Connection: timeout; Runtime: 8.01
2025-02-18 08:07:59,031 | DEBUG | proxy.py:271 | log | 211.234.125.3:443 [HTTP]: Initial connection; Runtime: 0.00
2025-02-18 08:07:59,062 | DEBUG | proxy.py:271 | log | 182.253.181.10:8080 [HTTP]: Received: 917 bytes: b'HTTP/1.1 200'; Runtime: 0.64
2025-02-18 08:07:59,062 | DEBUG | proxy.py:271 | log | 182.253.181.10:8080 [HTTP]: Get: success; Runtime: 0.00
2025-02-18 08:07:59,062 | DEBUG | checker.py:274 | _send_test_request | 182.253.181.10:8080 [HTTP]: (http://httpbin.org/get?show_env) rv: 5771, response: b'HTTP/1.1 200 OK\r\nDate: Tue, 18 Feb 2025 04:07:30 GMT\r\nContent-Type: application/json\r\nContent-Length: 711\r\nServer: gunicorn/19.9.0\r\nAccess-Control-Allow-Origin: *\r\nAccess-Control-Allow-Credentials: true\r\n\r\n{\n  "args": {\n    "show_env": ""\n  }, \n  "headers": {\n    "Accept": "*/*", \n    "Accept-Encoding": "gzip, deflate", \n    "Cache-Control": 

Found proxy: <Proxy ID 0.42s [HTTP: Transparent] 182.253.181.10:8080>


2025-02-18 08:07:59,335 | DEBUG | proxy.py:271 | log | 62.210.15.199:80 [HTTP]: Connection: timeout; Runtime: 8.01
2025-02-18 08:07:59,336 | DEBUG | proxy.py:271 | log | 62.210.15.199:80 [HTTP]: Initial connection; Runtime: 0.00
2025-02-18 08:07:59,337 | DEBUG | proxy.py:271 | log | 218.78.55.172:8089 [HTTPS]: Connection: success; Runtime: 2.23
2025-02-18 08:07:59,337 | DEBUG | proxy.py:271 | log | 218.78.55.172:8089 [HTTPS]: Request: b'CONNECT httpbin.org:443 HTTP/1.1\r\nHost: httpbin.org\r\nUser-Agent: PxBroker/0.4.0/\r\nConnection: keep-alive\r\n\r\n'; Runtime: 0.00
2025-02-18 08:07:59,390 | DEBUG | proxy.py:271 | log | 45.63.8.76:80 [HTTP]: Connection: timeout; Runtime: 8.01
2025-02-18 08:07:59,391 | DEBUG | proxy.py:271 | log | 45.63.8.76:80 [HTTP]: Initial connection; Runtime: 0.00
2025-02-18 08:07:59,448 | DEBUG | proxy.py:271 | log | 67.43.236.18:21159 [HTTP]: Connection: timeout; Runtime: 8.01
2025-02-18 08:07:59,449 | DEBUG | proxy.py:271 | log | 67.43.236.18:21159 [HTTP]: In

Found proxy: <Proxy CA 4.29s [HTTP: High] 67.43.228.250:3187>


2025-02-18 08:07:59,766 | DEBUG | proxy.py:271 | log | 154.65.39.8:80 [HTTP]: Connection: timeout; Runtime: 8.02
2025-02-18 08:07:59,766 | DEBUG | proxy.py:271 | log | 154.65.39.8:80 [HTTP]: Initial connection; Runtime: 0.00
2025-02-18 08:07:59,844 | DEBUG | proxy.py:271 | log | 45.119.55.157:3128 [HTTP]: Connection: timeout; Runtime: 8.00
2025-02-18 08:07:59,845 | DEBUG | proxy.py:271 | log | 45.119.55.157:3128 [HTTP]: Initial connection; Runtime: 0.00
2025-02-18 08:07:59,877 | DEBUG | proxy.py:271 | log | 103.248.222.0:90 [HTTPS]: Connection: timeout; Runtime: 8.02
2025-02-18 08:07:59,878 | DEBUG | proxy.py:271 | log | 103.248.222.0:90 [HTTPS]: Initial connection; Runtime: 0.00
2025-02-18 08:07:59,878 | DEBUG | proxy.py:271 | log | 72.10.160.90:33141 [HTTPS]: Received: timeout; Runtime: 8.01
2025-02-18 08:07:59,879 | DEBUG | proxy.py:271 | log | 72.10.160.90:33141 [HTTPS]: Connection: closed; Runtime: 0.00
2025-02-18 08:07:59,879 | DEBUG | proxy.py:271 | log | 72.10.160.90:33141 [HTT

Found proxy: <Proxy CN 4.01s [HTTP: High] 218.1.197.177:2324>


2025-02-18 08:08:01,384 | DEBUG | proxy.py:271 | log | 47.129.37.172:3128 [HTTP]: Received: timeout; Runtime: 8.01
2025-02-18 08:08:01,385 | DEBUG | proxy.py:271 | log | 47.129.37.172:3128 [HTTP]: Get: failed; Runtime: 0.00
2025-02-18 08:08:01,386 | DEBUG | checker.py:274 | _send_test_request | 47.129.37.172:3128 [HTTP]: (http://httpbin.org/get?show_env) rv: 4626, response: None
2025-02-18 08:08:01,386 | DEBUG | proxy.py:271 | log | 47.129.37.172:3128 [HTTP]: Connection: closed; Runtime: 0.00
2025-02-18 08:08:01,386 | DEBUG | proxy.py:271 | log | 47.129.37.172:3128 [HTTP]: Initial connection; Runtime: 0.00
2025-02-18 08:08:01,400 | DEBUG | proxy.py:271 | log | 2.180.31.98:8035 [HTTPS]: Connection: timeout; Runtime: 8.01
2025-02-18 08:08:01,400 | DEBUG | proxy.py:271 | log | 2.180.31.98:8035 [HTTPS]: Initial connection; Runtime: 0.00
2025-02-18 08:08:01,461 | DEBUG | proxy.py:271 | log | 222.67.8.128:1080 [HTTP]: Connection: timeout; Runtime: 8.01
2025-02-18 08:08:01,462 | DEBUG | proxy

Found proxy: <Proxy CA 1.14s [HTTP: High] 67.43.236.19:22299>
Found proxy: <Proxy IR 3.99s [HTTP: Transparent] 185.88.154.247:8585>


2025-02-18 08:08:05,374 | DEBUG | proxy.py:271 | log | 168.196.114.89:56000 [INFO]: Selected judge: <Judge [HTTPS] httpbin.org>; Runtime: 0.00
2025-02-18 08:08:05,375 | DEBUG | proxy.py:271 | log | 168.196.114.89:56000 [HTTPS]: Initial connection; Runtime: 0.00
2025-02-18 08:08:05,376 | DEBUG | proxy.py:271 | log | 152.26.229.52:9443 [INFO]: Selected judge: <Judge [HTTPS] httpbin.org>; Runtime: 0.00
2025-02-18 08:08:05,376 | DEBUG | proxy.py:271 | log | 152.26.229.52:9443 [HTTPS]: Initial connection; Runtime: 0.00
2025-02-18 08:08:05,377 | DEBUG | proxy.py:271 | log | 176.88.175.170:8080 [INFO]: Selected judge: <Judge [HTTPS] httpbin.org>; Runtime: 0.00
2025-02-18 08:08:05,377 | DEBUG | proxy.py:271 | log | 176.88.175.170:8080 [HTTPS]: Initial connection; Runtime: 0.00
2025-02-18 08:08:05,377 | DEBUG | proxy.py:271 | log | 103.188.173.153:8080 [INFO]: Selected judge: <Judge [HTTPS] httpbin.org>; Runtime: 0.00
2025-02-18 08:08:05,378 | DEBUG | proxy.py:271 | log | 103.188.173.153:8080 [

Found proxy: <Proxy DE 0.30s [HTTP: High] 81.169.213.169:8888>
Found proxy: <Proxy JP 0.26s [HTTP: High] 43.129.201.43:443>


2025-02-18 08:08:06,760 | DEBUG | proxy.py:271 | log | 67.43.228.253:15235 [HTTPS]: Received: 19 bytes: b'HTTP/1.0 200'; Runtime: 0.99
2025-02-18 08:08:06,761 | DEBUG | proxy.py:271 | log | 67.43.228.253:15235 [HTTPS]: SSL: Initial connection; Runtime: 0.00
2025-02-18 08:08:06,761 | DEBUG | proxy.py:271 | log | 67.43.228.253:15235 [HTTPS]: SSL: ; Runtime: 0.00
2025-02-18 08:08:06,761 | DEBUG | proxy.py:271 | log | 67.43.228.253:15235 [HTTPS]: Connection: closed; Runtime: 0.00
2025-02-18 08:08:06,764 | DEBUG | proxy.py:271 | log | 64.147.212.78:8080 [INFO]: Selected judge: <Judge [HTTPS] httpbin.org>; Runtime: 0.00
2025-02-18 08:08:06,765 | DEBUG | proxy.py:271 | log | 64.147.212.78:8080 [HTTPS]: Initial connection; Runtime: 0.00
2025-02-18 08:08:06,766 | DEBUG | proxy.py:271 | log | 118.70.184.10:31300 [HTTPS]: Connection: failed; Runtime: 1.44
2025-02-18 08:08:06,766 | DEBUG | proxy.py:271 | log | 118.70.184.10:31300 [HTTPS]: Selected judge: <Judge [HTTP] proxyjudge.us>; Runtime: 0.00

Found proxy: <Proxy VN 0.42s [HTTP: High] 123.30.154.171:7777>


2025-02-18 08:08:07,057 | DEBUG | proxy.py:271 | log | 106.42.30.243:82 [HTTPS]: Request: b'CONNECT httpbin.org:443 HTTP/1.1\r\nHost: httpbin.org\r\nUser-Agent: PxBroker/0.4.0/\r\nConnection: keep-alive\r\n\r\n'; Runtime: 0.00
2025-02-18 08:08:07,075 | DEBUG | proxy.py:271 | log | 180.105.244.206:1080 [HTTPS]: Connection: failed; Runtime: 1.91
2025-02-18 08:08:07,075 | DEBUG | proxy.py:271 | log | 180.105.244.206:1080 [HTTPS]: Selected judge: <Judge [HTTP] httpbin.org>; Runtime: 0.00
2025-02-18 08:08:07,076 | DEBUG | proxy.py:271 | log | 180.105.244.206:1080 [HTTP]: Initial connection; Runtime: 0.00
2025-02-18 08:08:07,076 | DEBUG | proxy.py:271 | log | 51.254.132.238:90 [HTTPS]: Connection: timeout; Runtime: 8.01
2025-02-18 08:08:07,077 | DEBUG | proxy.py:271 | log | 51.254.132.238:90 [HTTPS]: Initial connection; Runtime: 0.00
2025-02-18 08:08:07,105 | DEBUG | proxy.py:271 | log | 23.247.137.142:80 [HTTP]: Received: 826 bytes: b'HTTP/1.1 200'; Runtime: 0.39
2025-02-18 08:08:07,105 | D

Found proxy: <Proxy BH 0.31s [HTTP: High] 23.247.137.142:80>


In [None]:
import logging
from typing import List, Optional

from ipykernel.eventloops import register_integration

# Set up logging
logging.basicConfig(
    format="%(asctime)s | %(levelname)s | %(filename)s:%(lineno)d | %(funcName)s | %(message)s",
    level=logging.DEBUG,
)
logger = logging.getLogger(__name__)


class NoProxiesAvailable(Exception):
    pass


class ProxyManager:
    def __init__(self, timeout=5, refresh_interval=300, blacklist_duration=600, num_proxies=50):
        self.logger = logging.getLogger(__name__)
        self.proxy_list = []
        self.blacklist = {}  # {proxy: timestamp}
        self.refresh_interval = refresh_interval
        self.blacklist_duration = blacklist_duration
        self.last_refresh = 0
        self.num_proxies = num_proxies
        self.timeout = timeout
        self.test_url = "https://httpbin.org/ip"
        # No longer needed: self.semaphore = asyncio.Semaphore(num_proxies)
        self.broker = Broker()  # Create a Broker instance

    async def _test_proxy(self, proxy_str: str) -> Optional[str]:
        """Test if a proxy is working using `requests` and return it (without latency) if successful."""

        if proxy_str in self.blacklist and time.time() - self.blacklist[proxy_str] < self.blacklist_duration:
            return None

        def test():
            try:
                #  proxybroker returns proxy as string "IP:Port"
                proxies = {
                    "http": f"http://{proxy_str}",
                    "https": f"http://{proxy_str}",  # Try both http and https (adjust as needed)
                }
                self.logger.info(f"Testing proxy: {proxy_str}")
                response = requests.get(self.test_url, proxies=proxies, timeout=self.timeout, verify=False)
                response.raise_for_status()
                if response.status_code == 200:
                    self.logger.info(f"Proxy {proxy_str} working")
                    return proxy_str
                self.logger.info(f"Proxy {proxy_str} failed with status code {response.status_code}")
                return None
            except (requests.RequestException, requests.Timeout) as e:
                self.logger.debug(f"Proxy {proxy_str} failed: {type(e).__name__}: {e}")
                return None

        return await asyncio.to_thread(test)

    async def get_working_proxies(self) -> List[str]:
        """Fetch, test, and return a list of working proxies using ProxyBroker."""
        current_time = time.time()

        if current_time - self.last_refresh < self.refresh_interval and self.proxy_list:
            return self.proxy_list  # Use cached list if still valid

        proxies = asyncio.Queue()
        # Find proxies using ProxyBroker.  This is non-blocking.
        find_task = asyncio.create_task(self.broker.find(types=["HTTP", "HTTPS"], limit=self.num_proxies, data=proxies))

        self.proxy_list = []  # Reset proxy_list
        tested_count = 0
        try:
            while True:
                # Get proxies from the queue (asynchronously)
                proxy = await asyncio.wait_for(proxies.get(), timeout=self.refresh_interval)  # Set a reasonable timeout
                if proxy is None:  # Broker signals end with None
                    break
                # proxybroker return proxy object, convert it to ip string
                proxy_str = f"{proxy.host}:{proxy.port}"
                tested_proxy = await self._test_proxy(proxy_str)  # test the proxy

                if tested_proxy:
                    self.proxy_list.append(tested_proxy)  # Add the tested proxy
                tested_count += 1

                if len(self.proxy_list) >= self.num_proxies or tested_count >= self.num_proxies * 2:
                    break  # Stop if enough proxies are found, or tested 2x the target number.

        except asyncio.TimeoutError:
            self.logger.warning("Timeout while waiting for proxies from broker.")
        finally:
            find_task.cancel()  # Ensure the find_task is cancelled.
            try:
                await find_task
            except asyncio.CancelledError:
                pass

        self.last_refresh = time.time()

        if not self.proxy_list:
            self.logger.warning("No working proxies found.")
            raise NoProxiesAvailable("No working proxies found.")
        return self.proxy_list

    async def refresh_proxies(self):
        """Force refresh the proxy list."""
        await self.get_working_proxies()

    async def get_random_proxy(self) -> Optional[str]:
        """Return a random working proxy."""
        try:
            if not self.proxy_list:
                await self.refresh_proxies()
            return random.choice(self.proxy_list) if self.proxy_list else None
        except NoProxiesAvailable:
            return None

    def remove_proxy(self, proxy: str):
        """Remove a proxy and blacklist it."""
        if proxy in self.proxy_list:
            self.proxy_list.remove(proxy)
            self.blacklist[proxy] = time.time()


async def main():
    proxy_manager = ProxyManager(timeout=10, num_proxies=10)  # Adjust as needed
    try:
        working_proxies = await proxy_manager.get_working_proxies()
        print(f"Working proxies: {working_proxies}")

        if working_proxies:
            random_proxy = await proxy_manager.get_random_proxy()
            print(f"Random proxy: {random_proxy}")

            # Example usage (replace with your actual logic)
            if random_proxy:
                try:
                    proxies = {
                        "http": f"http://{random_proxy}",
                        "https": f"http://{random_proxy}",
                    }
                    response = requests.get("https://www.google.com", proxies=proxies, timeout=10)
                    response.raise_for_status()
                    print(f"Successfully fetched Google using proxy: {random_proxy}")
                except requests.RequestException as e:
                    print(f"Error using proxy {random_proxy}: {e}")
                    proxy_manager.remove_proxy(random_proxy)  # Remove the failing proxy

    except NoProxiesAvailable:
        print("No working proxies available.")
    except Exception as e:  # Catch other error
        print(f"An error occurred: {e}")


await main()

In [None]:



async def find_and_show_proxies(types, limit):
    """Finds proxies, prints them, and returns a list of found proxies."""
    proxies = asyncio.Queue()
    broker = Broker(proxies)
    found_proxies = []  # List to store found proxies

    try:

        async def process_proxies():
            find_task = asyncio.create_task(broker.find(types=types, limit=limit))
            while True:
                try:
                    proxy = await asyncio.wait_for(proxies.get(), timeout=10.0)
                    if proxy is None:
                        break
                    print(f"Found proxy: {proxy}")
                    found_proxies.append(proxy)  # Add the proxy to the list
                except asyncio.TimeoutError:
                    print("Timeout waiting for proxy. Checking if find task is done...")
                    if find_task.done():
                        break
                    else:
                        continue
                except Exception as e:
                    print(f"Error processing proxy: {e}")
            await find_task

        await process_proxies()
        return found_proxies  # Return the list of proxies

    except Exception as e:
        print(f"An error occurred in find_and_show_proxies: {e}")
        return []  # Return an empty list in case of an error
    finally:
        broker.stop()
        while not proxies.empty():
            proxies.get_nowait()
        print("Broker stopped and queue cleared.")


proxies_list = []


async def main():
    try:
        proxy_list = await find_and_show_proxies(types=["HTTP", "HTTPS"], limit=10)
        print(f"\nTotal proxies found: {len(proxy_list)}")

        for proxy in proxy_list:
            proxies_list.append(f"{proxy.host}:{proxy.port}")
            print(proxy)

    except Exception as e:
        print(f"An error occurred within main: {e}")


await main()


In [10]:
proxies_list

['203.19.38.114:1080',
 '46.4.246.198:3128',
 '47.83.192.255:8888',
 '133.18.234.13:80',
 '221.231.13.198:1080',
 '183.215.23.242:9091',
 '119.3.113.151:9094',
 '106.38.26.22:2080',
 '47.100.254.82:80',
 '58.243.224.244:8085']

In [10]:
import logging

import aiohttp

# Set up logging (optional, but good practice)
logging.basicConfig(
    format="%(asctime)s | %(levelname)s | %(filename)s:%(lineno)d | %(funcName)s | %(message)s",
    level=logging.INFO,
)
logger = logging.getLogger(__name__)


async def find_and_show_proxies(types, limit, results_queue):
    """Finds proxies, prints them, and puts them in a queue, respecting the limit."""
    proxies = asyncio.Queue()
    broker = Broker(proxies)
    found_count = 0  # Keep track of how many proxies we've found

    try:

        async def process_proxies():
            nonlocal found_count  # Allow modification of the outer scope variable
            find_task = asyncio.create_task(broker.find(types=types))  # No limit here

            while True:
                if found_count >= limit:  # Check the limit *before* getting a proxy
                    break

                try:
                    proxy = await asyncio.wait_for(proxies.get(), timeout=10.0)
                    if proxy is None:
                        break

                    print(f"Found proxy: {proxy}")
                    found_count += 1  # Increment the counter
                    await results_queue.put(f"{proxy.host}:{proxy.port}")

                except asyncio.TimeoutError:
                    logger.info("Timeout waiting for proxy. Checking if find task is done...")
                    if find_task.done():
                        break
                    else:
                        continue  # keep finding
                except Exception as e:
                    logger.error(f"Error processing proxy: {e}")

            await find_task

        await process_proxies()

    except Exception as e:
        logger.exception(f"An error occurred in find_and_show_proxies: {e}")
    finally:
        broker.stop()
        while not proxies.empty():
            proxies.get_nowait()
        logger.info("Broker stopped and queue cleared.")
        await results_queue.put(None)  # Signal end of results


async def collect_results(results_queue, proxies_list):
    """Collects results from the queue and appends them to the list."""
    while True:
        proxy_str = await results_queue.get()
        if proxy_str is None:
            break
        proxies_list.append(proxy_str)


async def test_proxy(proxy, working_proxies):
    """Tests a single proxy using aiohttp."""
    proxy_url = f"http://{proxy}"  # aiohttp expects the full URL
    try:
        async with aiohttp.ClientSession() as session:
            async with session.get("https://www.google.com", proxy=proxy_url, timeout=10) as response:
                response.raise_for_status()  # Raise HTTPError for bad responses (4xx or 5xx)
                logger.info(f"Successfully fetched Google using proxy: {proxy}")
                working_proxies.append(proxy)  # add to working proxy list
    except (aiohttp.ClientError, asyncio.TimeoutError) as e:
        logger.error(f"Error using proxy {proxy}: {e}")


async def main():
    try:
        results_queue = asyncio.Queue()  # Queue for results
        proxies_list = []  # Shared list to store proxies
        working_proxies = []  # List to store working proxies

        # Create tasks for finding proxies and collecting results
        # find_task = asyncio.create_task(find_and_show_proxies(types=["HTTP", "HTTPS"], limit=5, results_queue=results_queue))
        find_task = asyncio.create_task(find_and_show_proxies(types=["HTTPS"], limit=5, results_queue=results_queue))
        collect_task = asyncio.create_task(collect_results(results_queue, proxies_list))

        # Wait for both tasks to complete
        await asyncio.gather(find_task, collect_task)

        print(f"\nTotal proxies found: {len(proxies_list)}")
        for proxy_str in proxies_list:
            print(proxy_str)

        # Test the proxies concurrently
        test_tasks = [test_proxy(proxy, working_proxies) for proxy in proxies_list]
        await asyncio.gather(*test_tasks)

        print(f"\nWorking proxies: {len(working_proxies)}")
        for proxy in working_proxies:
            print(proxy)  # print working proxies

    except Exception as e:
        logger.exception(f"An error occurred within main: {e}")


await main()


2025-02-18 10:44:09,723 | INFO | 3803732578.py:41 | process_proxies | Timeout waiting for proxy. Checking if find task is done...
2025-02-18 10:44:09,724 | INFO | api.py:432 | _done | Done! Total found proxies: 253
2025-02-18 10:44:09,724 | INFO | api.py:423 | stop | Stop!
2025-02-18 10:44:09,724 | INFO | 3803732578.py:59 | find_and_show_proxies | Broker stopped and queue cleared.



Total proxies found: 0

Working proxies: 0


In [6]:
import logging


# Set up logging (optional, but good practice)
logging.basicConfig(
    format="%(asctime)s | %(levelname)s | %(filename)s:%(lineno)d | %(funcName)s | %(message)s",
    level=logging.INFO,
)
logger = logging.getLogger(__name__)


async def find_and_show_proxies(types, limit, results_queue):
    """Finds proxies, prints them, and puts them in a queue, respecting the limit."""
    proxies = asyncio.Queue()
    broker = Broker(proxies)
    found_count = 0  # Keep track of how many proxies we've found

    try:

        async def process_proxies():
            nonlocal found_count  # Allow modification of the outer scope variable
            find_task = asyncio.create_task(broker.find(types=types))  # No limit here

            while True:
                if found_count >= limit:  # Check the limit *before* getting a proxy
                    break

                try:
                    proxy = await asyncio.wait_for(proxies.get(), timeout=10.0)
                    if proxy is None:
                        break

                    print(f"Found proxy: {proxy}")
                    found_count += 1  # Increment the counter
                    await results_queue.put(f"{proxy.host}:{proxy.port}")

                except asyncio.TimeoutError:
                    logger.info("Timeout waiting for proxy. Checking if find task is done...")
                    if find_task.done():
                        break
                    else:
                        continue  # keep finding
                except Exception as e:
                    logger.error(f"Error processing proxy: {e}")

            await find_task

        await process_proxies()

    except Exception as e:
        logger.exception(f"An error occurred in find_and_show_proxies: {e}")
    finally:
        broker.stop()
        while not proxies.empty():
            proxies.get_nowait()
        logger.info("Broker stopped and queue cleared.")
        await results_queue.put(None)  # Signal end of results


async def collect_results(results_queue, proxies_list):
    """Collects results from the queue and appends them to the list."""
    while True:
        proxy_str = await results_queue.get()
        if proxy_str is None:
            break
        proxies_list.append(proxy_str)


async def test_proxy(proxy, working_proxies):
    """Tests a single proxy using aiohttp."""
    proxy_url = f"http://{proxy}"  # aiohttp expects the full URL
    try:
        async with aiohttp.ClientSession() as session:
            async with session.get("https://www.google.com", proxy=proxy_url, timeout=10) as response:
                response.raise_for_status()  # Raise HTTPError for bad responses (4xx or 5xx)
                logger.info(f"Successfully fetched Google using proxy: {proxy}")
                working_proxies.append(proxy)  # add to working proxy list
    except (aiohttp.ClientError, asyncio.TimeoutError) as e:
        logger.error(f"Error using proxy {proxy}: {e}")


async def check_google_connectivity(proxy):
    """Checks if a proxy can connect to google.com."""
    proxy_url = f"http://{proxy}"
    try:
        async with aiohttp.ClientSession() as session:
            async with session.get("https://www.google.com", proxy=proxy_url, timeout=10) as response:
                response.raise_for_status()
                return True  # Connectivity successful
    except (aiohttp.ClientError, asyncio.TimeoutError):
        return False  # Connectivity failed


async def main():
    try:
        results_queue = asyncio.Queue()  # Queue for results
        proxies_list = []  # Shared list to store proxies
        working_proxies = []  # list to store working proxies

        # Create tasks for finding proxies and collecting results
        find_task = asyncio.create_task(find_and_show_proxies(types=["HTTP", "HTTPS"], limit=5, results_queue=results_queue))
        collect_task = asyncio.create_task(collect_results(results_queue, proxies_list))

        # Wait for both tasks to complete
        await asyncio.gather(find_task, collect_task)

        print(f"\nTotal proxies found: {len(proxies_list)}")
        for proxy_str in proxies_list:
            print(proxy_str)

        # Test the proxies concurrently
        test_tasks = [test_proxy(proxy, working_proxies) for proxy in proxies_list]
        await asyncio.gather(*test_tasks)

        print(f"\nWorking proxies: {len(working_proxies)}")
        # final_working_proxies = []
        for proxy in working_proxies:
            print(proxy)  # print working proxies
            # if await check_google_connectivity(proxy):  # No need to check again
            #     final_working_proxies.append(proxy)

        # print(f"\nFinal working proxies (and can connect to Google): {len(final_working_proxies)}")
        # for proxy in final_working_proxies:
        #     print(proxy)

    except Exception as e:
        logger.exception(f"An error occurred within main: {e}")


await main()


Found proxy: <Proxy US 0.34s [HTTP: Transparent] 47.83.192.255:8888>
Found proxy: <Proxy CN 0.49s [HTTP: High] 111.34.79.216:10219>
Found proxy: <Proxy CN 0.52s [HTTP: High] 117.36.76.86:8085>
Found proxy: <Proxy RU 0.53s [HTTP: Transparent] 185.105.102.189:80>


2025-02-18 10:37:42,421 | INFO | api.py:432 | _done | Done! Total found proxies: 218
2025-02-18 10:37:42,422 | INFO | api.py:423 | stop | Stop!
2025-02-18 10:37:42,422 | INFO | 2247258965.py:59 | find_and_show_proxies | Broker stopped and queue cleared.


Found proxy: <Proxy US 0.68s [HTTP: High] 138.68.60.8:3128>

Total proxies found: 5
47.83.192.255:8888
111.34.79.216:10219
117.36.76.86:8085
185.105.102.189:80
138.68.60.8:3128


2025-02-18 10:37:42,936 | ERROR | 2247258965.py:82 | test_proxy | Error using proxy 111.34.79.216:10219: [WinError 10054] An existing connection was forcibly closed by the remote host
2025-02-18 10:37:42,940 | ERROR | 2247258965.py:82 | test_proxy | Error using proxy 138.68.60.8:3128: 400, message='Bad Request', url='http://138.68.60.8:3128'
2025-02-18 10:37:43,017 | ERROR | 2247258965.py:82 | test_proxy | Error using proxy 47.83.192.255:8888: 400, message='Bad Request', url='http://47.83.192.255:8888'
2025-02-18 10:37:43,216 | ERROR | 2247258965.py:82 | test_proxy | Error using proxy 117.36.76.86:8085: [WinError 10054] An existing connection was forcibly closed by the remote host
2025-02-18 10:37:43,231 | ERROR | 2247258965.py:82 | test_proxy | Error using proxy 185.105.102.189:80: 400, message='Bad Request', url='http://185.105.102.189'



Working proxies: 0


In [None]:
import logging


# Set up logging
logging.basicConfig(
    format="%(asctime)s | %(levelname)s | %(filename)s:%(lineno)d | %(funcName)s | %(message)s",
    level=logging.INFO,
)
logger = logging.getLogger(__name__)


async def find_and_show_proxies(limit, results_queue):
    proxies = asyncio.Queue()
    judges = ["https://httpbin.org/get?show_env"]
    # broker = Broker(queue=proxies, verify_ssl=True, max_conn=200, max_tries=3, judges=judges)
    broker = Broker(queue=proxies, verify_ssl=True, max_conn=200, max_tries=3)
    found_count = 0

    try:

        async def process_proxies():
            nonlocal found_count
            find_task = asyncio.create_task(broker.find(types=["HTTP", "HTTPS", "CONNECT:443", "CONNECT:80"]))

            while True:
                # if found_count >= limit:
                #     break

                try:
                    proxy = await asyncio.wait_for(proxies.get(), timeout=10.0)
                    if proxy is None:
                        break

                    print(f"Found proxy: {proxy}")
                    found_count += 1
                    await results_queue.put(f"{proxy.host}:{proxy.port}")

                except asyncio.TimeoutError:
                    logger.info("Timeout waiting for proxy. Checking if find task is done...")
                    if find_task.done():
                        break
                    else:
                        continue
                except Exception as e:
                    logger.error(f"Error processing proxy: {e}")
            await find_task

        await process_proxies()

    except Exception as e:
        logger.exception(f"An error occurred in find_and_show_proxies: {e}")
    finally:
        broker.stop()
        while not proxies.empty():
            proxies.get_nowait()
        logger.info("Broker stopped and queue cleared.")
        await results_queue.put(None)


async def collect_results(results_queue, proxies_list):
    """Collects results from the queue and appends them to the list."""
    while True:
        proxy_str = await results_queue.get()
        if proxy_str is None:
            break
        proxies_list.append(proxy_str)


async def test_proxy(proxy, working_proxies):
    proxy_url = f"http://{proxy}"
    timeout = aiohttp.ClientTimeout(total=10)

    try:
        async with aiohttp.ClientSession(timeout=timeout) as session:
            async with session.request("CONNECT", "scholar.google.com/scholar:443", proxy=proxy_url) as conn_response:
                conn_response.raise_for_status()
                async with session.get("https://scholar.google.com/scholar") as get_response:
                    get_response.raise_for_status()
                    logger.info(f"Successfully fetched Google using proxy: {proxy}")
                    working_proxies.append(proxy)

    except aiohttp.ClientProxyConnectionError as e:
        logger.error(f"Proxy connection error for {proxy}: {e}")
    except aiohttp.ClientError as e:
        logger.error(f"Client error for {proxy}: {e}")
    except asyncio.TimeoutError:
        logger.error(f"Timeout error for {proxy}")
    except Exception as e:
        logger.exception(f"Unexpected error testing proxy {proxy}: {e}")


async def main():
    try:
        results_queue = asyncio.Queue()
        proxies_list = []
        working_proxies = []

        find_task = asyncio.create_task(find_and_show_proxies(limit=10, results_queue=results_queue))
        collect_task = asyncio.create_task(collect_results(results_queue, proxies_list))
        await asyncio.gather(find_task, collect_task)

        print(f"\nTotal proxies found: {len(proxies_list)}")
        for proxy_str in proxies_list:
            print(proxy_str)

        test_tasks = [test_proxy(proxy, working_proxies) for proxy in proxies_list]
        await asyncio.gather(*test_tasks)

        print(f"\nWorking proxies: {len(working_proxies)}")
        for proxy in working_proxies:
            print(proxy)

    except Exception as e:
        logger.exception(f"An error occurred within main: {e}")


await main()


Found proxy: <Proxy CN 0.35s [HTTP: High] 101.71.143.237:8092>
Found proxy: <Proxy CA 0.35s [HTTP: Transparent] 142.93.202.130:3128>
Found proxy: <Proxy CN 0.43s [HTTP: High] 218.77.183.214:5224>
Found proxy: <Proxy CN 0.45s [HTTP: Transparent] 27.37.98.14:8888>
Found proxy: <Proxy RU 0.50s [HTTP: Transparent] 185.105.102.189:80>
Found proxy: <Proxy CN 0.58s [HTTP: High] 117.36.76.86:8085>
Found proxy: <Proxy US 0.63s [HTTP: High] 198.49.68.80:80>
Found proxy: <Proxy IR 0.76s [HTTP: High] 87.248.129.32:80>
Found proxy: <Proxy DK 1.21s [HTTP: High] 194.182.163.117:3128>
Found proxy: <Proxy JP 0.55s [HTTP: Transparent, CONNECT:80] 43.133.136.208:8800>
Found proxy: <Proxy ID 1.88s [CONNECT:80] 182.255.0.62:3128>
Found proxy: <Proxy VN 1.03s [HTTP: High, CONNECT:80] 116.108.1.126:10024>
Found proxy: <Proxy CA 2.85s [HTTP: Transparent] 158.69.122.49:3129>
Found proxy: <Proxy CN 0.55s [HTTP: High] 113.108.13.120:8083>
Found proxy: <Proxy -- 1.41s [CONNECT:80] 45.87.68.3:15321>
Found proxy: <

2025-02-18 11:41:25,510 | INFO | 249969982.py:42 | process_proxies | Timeout waiting for proxy. Checking if find task is done...
2025-02-18 11:41:25,511 | INFO | api.py:432 | _done | Done! Total found proxies: 1195
2025-02-18 11:41:25,511 | INFO | api.py:423 | stop | Stop!
2025-02-18 11:41:25,512 | INFO | 249969982.py:59 | find_and_show_proxies | Broker stopped and queue cleared.
2025-02-18 11:41:25,518 | ERROR | 249969982.py:88 | test_proxy | Client error for 101.71.143.237:8092: www.google.com:443
2025-02-18 11:41:25,519 | ERROR | 249969982.py:88 | test_proxy | Client error for 142.93.202.130:3128: www.google.com:443
2025-02-18 11:41:25,519 | ERROR | 249969982.py:88 | test_proxy | Client error for 218.77.183.214:5224: www.google.com:443
2025-02-18 11:41:25,519 | ERROR | 249969982.py:88 | test_proxy | Client error for 27.37.98.14:8888: www.google.com:443
2025-02-18 11:41:25,520 | ERROR | 249969982.py:88 | test_proxy | Client error for 185.105.102.189:80: www.google.com:443
2025-02-18 


Total proxies found: 157
101.71.143.237:8092
142.93.202.130:3128
218.77.183.214:5224
27.37.98.14:8888
185.105.102.189:80
117.36.76.86:8085
198.49.68.80:80
87.248.129.32:80
194.182.163.117:3128
43.133.136.208:8800
182.255.0.62:3128
116.108.1.126:10024
158.69.122.49:3129
113.108.13.120:8083
45.87.68.3:15321
106.75.146.240:10810
87.248.129.26:80
186.167.80.235:8090
160.20.144.10:3128
49.49.58.98:8080
14.145.188.60:1070
181.198.120.212:999
103.245.36.15:8090
103.24.214.230:8080
106.38.26.22:2080
103.155.196.110:8080
59.50.95.61:8092
61.149.134.61:8000
125.99.106.250:3128
187.85.82.222:55676
47.243.114.192:8180
45.133.107.10:81
51.75.86.68:3128
219.65.73.81:80
122.52.213.79:62102
37.204.42.83:8081
51.254.78.223:80
103.133.26.75:8181
164.163.134.226:999
72.10.160.170:23375
200.69.92.98:999
103.152.112.120:80
162.223.90.130:80
119.3.113.150:9094
111.34.79.216:10219
218.1.197.52:2324
47.100.254.82:80
180.180.90.175:8080
65.108.203.35:28080
119.3.113.151:9094
119.3.113.152:9094
67.43.236.18:32

In [None]:
import logging


# Set up logging
logging.basicConfig(
    format="%(asctime)s | %(levelname)s | %(filename)s:%(lineno)d | %(funcName)s | %(message)s",
    level=logging.INFO,
)
logger = logging.getLogger(__name__)


async def fetch_proxies(url):
    """Fetches and parses proxies from the given URL."""
    proxies = []
    timeout = aiohttp.ClientTimeout(total=30)
    try:
        async with aiohttp.ClientSession(timeout=timeout) as session:
            async with session.get(url) as response:
                response.raise_for_status()
                text = await response.text()
                # Split the content by lines to get individual proxies
                proxies = text.splitlines()
                proxies = [proxy.strip() for proxy in proxies if proxy.strip()]

    except aiohttp.ClientError as e:
        logger.error(f"Client error fetching proxies: {e}")
    except Exception as e:
        logger.exception(f"Error fetching proxies from {url}: {e}")
    return proxies


async def test_proxy(proxy, working_proxies):
    """Tests a single proxy."""
    proxy_url = f"https://{proxy}"
    timeout = aiohttp.ClientTimeout(total=10)

    try:
        async with aiohttp.ClientSession(timeout=timeout) as session:
            async with session.request("CONNECT", "scholar.google.com/scholar:443", proxy=proxy_url) as conn_response:
                conn_response.raise_for_status()
                async with session.get("https://scholar.google.com/scholar", proxy=proxy_url) as get_response:
                    get_response.raise_for_status()
                    logger.info(f"Successfully fetched Google using proxy: {proxy}")
                    working_proxies.append(proxy)

    except aiohttp.ClientProxyConnectionError as e:
        logger.error(f"Proxy connection error for {proxy}: {e}")
    except aiohttp.ClientError as e:
        logger.error(f"Client error for {proxy}: {e}")
    except asyncio.TimeoutError:
        logger.error(f"Timeout error for {proxy}")
    except Exception as e:
        logger.exception(f"Unexpected error testing proxy {proxy}: {e}")


async def main():
    """Main function to fetch, test, and print proxies."""
    try:
        proxy_url = "https://advanced.name/freeproxy/67b5d6e7c5c96?type=https"
        proxies_list = await fetch_proxies(proxy_url)
        working_proxies = []

        if proxies_list:
            print(f"\nTotal proxies fetched: {len(proxies_list)}")
            # for proxy_str in proxies_list:  # No need to print
            #     print(proxy_str)

            test_tasks = [test_proxy(proxy, working_proxies) for proxy in proxies_list]
            await asyncio.gather(*test_tasks)

            print(f"\nWorking proxies: {len(working_proxies)}")
            for proxy in working_proxies:
                print(proxy)
        else:
            print("No proxies found.")

    except Exception as e:
        logger.exception(f"An error occurred within main: {e}")


# if __name__ == "__main__":
#     asyncio.run(main())
await main()

2025-02-19 17:14:27,746 | ERROR | 1817436218.py:50 | test_proxy | Client error for 110.93.225.73:8080: scholar.google.com/scholar:443
2025-02-19 17:14:27,747 | ERROR | 1817436218.py:50 | test_proxy | Client error for 116.206.242.110:3888: scholar.google.com/scholar:443
2025-02-19 17:14:27,747 | ERROR | 1817436218.py:50 | test_proxy | Client error for 218.255.90.106:8002: scholar.google.com/scholar:443
2025-02-19 17:14:27,747 | ERROR | 1817436218.py:50 | test_proxy | Client error for 103.82.26.77:1996: scholar.google.com/scholar:443
2025-02-19 17:14:27,747 | ERROR | 1817436218.py:50 | test_proxy | Client error for 4.149.210.210:3128: scholar.google.com/scholar:443
2025-02-19 17:14:27,748 | ERROR | 1817436218.py:50 | test_proxy | Client error for 27.147.137.90:6969: scholar.google.com/scholar:443
2025-02-19 17:14:27,748 | ERROR | 1817436218.py:50 | test_proxy | Client error for 47.90.205.231:33333: scholar.google.com/scholar:443
2025-02-19 17:14:27,748 | ERROR | 1817436218.py:50 | test_p


Total proxies fetched: 105

Working proxies: 0


In [None]:
import logging


# Set up logging
logging.basicConfig(
    format="%(asctime)s | %(levelname)s | %(filename)s:%(lineno)d | %(funcName)s | %(message)s",
    level=logging.INFO,
)
logger = logging.getLogger(__name__)


async def fetch_proxies(url):
    """Fetches and parses proxies from the given URL."""
    proxies = []
    timeout = aiohttp.ClientTimeout(total=30)
    try:
        async with aiohttp.ClientSession(timeout=timeout) as session:
            async with session.get(url) as response:
                response.raise_for_status()
                text = await response.text()
                # Split the content by lines to get individual proxies
                proxies = text.splitlines()
                proxies = [proxy.strip() for proxy in proxies if proxy.strip()]

    except aiohttp.ClientError as e:
        logger.error(f"Client error fetching proxies: {e}")
    except Exception as e:
        logger.exception(f"Error fetching proxies from {url}: {e}")
    return proxies


async def simple_test_proxy(proxy):
    """Performs a simple GET request through the proxy to a test site."""
    proxy_url = f"https://{proxy}"
    timeout = aiohttp.ClientTimeout(total=5)  # Shorter timeout for simple test
    test_url = "https://httpbin.org/ip"  # Simple test site

    try:
        async with aiohttp.ClientSession(timeout=timeout) as session:
            async with session.get(test_url, proxy=proxy_url) as response:
                response.raise_for_status()
                data = await response.json()
                logger.info(f"Proxy {proxy} test successful. Response: {data}")
                return True

    except Exception as e:
        logger.error(f"Proxy {proxy} test failed: {e}")
        return False


async def test_proxy(proxy, working_proxies):
    """Tests a single proxy."""
    proxy_url = f"https://{proxy}"
    timeout = aiohttp.ClientTimeout(total=10)

    try:
        async with aiohttp.ClientSession(timeout=timeout) as session:
            async with session.request("CONNECT", "scholar.google.com/scholar:443", proxy=proxy_url) as conn_response:
                conn_response.raise_for_status()
                async with session.get("https://scholar.google.com/scholar", proxy=proxy_url) as get_response:
                    get_response.raise_for_status()
                    logger.info(f"Successfully fetched Google using proxy: {proxy}")
                    working_proxies.append(proxy)

    except aiohttp.ClientProxyConnectionError as e:
        logger.error(f"Proxy connection error for {proxy}: {e}")
    except aiohttp.ClientError as e:
        logger.error(f"Client error for {proxy}: {e}")
    except asyncio.TimeoutError:
        logger.error(f"Timeout error for {proxy}")
    except Exception as e:
        logger.exception(f"Unexpected error testing proxy {proxy}: {e}")


async def main():
    """Main function to fetch, test, and print proxies."""
    try:
        proxy_url = "https://advanced.name/freeproxy/67b5d6e7c5c96?type=https"
        proxies_list = await fetch_proxies(proxy_url)
        working_proxies = []

        if proxies_list:
            print(f"\nTotal proxies fetched: {len(proxies_list)}")

            # Perform simple tests first
            print("\nPerforming simple tests...")
            simple_test_results = await asyncio.gather(*[simple_test_proxy(proxy) for proxy in proxies_list])

            # Filter proxies that passed the simple test
            simply_tested_proxies = [proxy for proxy, passed in zip(proxies_list, simple_test_results) if passed]

            if simply_tested_proxies:
                print(f"\n{len(simply_tested_proxies)} proxies passed initial test")

                print(f"\nTesting {len(simply_tested_proxies)} proxies against scholar.google.com")
                test_tasks = [test_proxy(proxy, working_proxies) for proxy in simply_tested_proxies]
                await asyncio.gather(*test_tasks)
            else:
                print("No proxies passed initial test.")

            print(f"\nWorking proxies (scholar.google.com): {len(working_proxies)}")
            for proxy in working_proxies:
                print(proxy)
        else:
            print("No proxies found.")

    except Exception as e:
        logger.exception(f"An error occurred within main: {e}")


# if __name__ == "__main__":
#     asyncio.run(main()) #for .py files
await main()


In [None]:
import logging


# Set up logging
logging.basicConfig(
    format="%(asctime)s | %(levelname)s | %(filename)s:%(lineno)d | %(funcName)s | %(message)s",
    level=logging.INFO,
)
logger = logging.getLogger(__name__)


async def fetch_proxies(url):
    """Fetches and parses proxies from the given URL."""
    proxies = []
    timeout = aiohttp.ClientTimeout(total=30)
    try:
        async with aiohttp.ClientSession(timeout=timeout) as session:
            async with session.get(url) as response:
                response.raise_for_status()
                text = await response.text()
                proxies = text.splitlines()
                proxies = [proxy.strip() for proxy in proxies if proxy.strip()]

    except aiohttp.ClientError as e:
        logger.error(f"Client error fetching proxies: {e}")
    except Exception as e:
        logger.exception(f"Error fetching proxies from {url}: {e}")
    return proxies


async def simple_test_proxy(proxy):
    """Performs a simple GET request through the proxy to a test site."""
    proxy_url = f"http://{proxy}"  # Use http for initial test
    timeout = aiohttp.ClientTimeout(total=5)
    test_url = "http://httpbin.org/ip"

    try:
        async with aiohttp.ClientSession(timeout=timeout) as session:
            async with session.get(test_url, proxy=proxy_url) as response:
                response.raise_for_status()
                data = await response.json()
                logger.info(f"Proxy {proxy} test successful. Response: {data}")
                return True

    except Exception as e:
        logger.debug(f"Proxy {proxy} test failed: {e}")  # Debug level for failed simple tests
        return False


async def test_proxy(proxy, working_proxies):
    """Tests an HTTPS proxy using CONNECT."""
    proxy_url = f"http://{proxy}"  # Still use http:// for proxy_url
    timeout = aiohttp.ClientTimeout(total=10)

    try:
        async with aiohttp.ClientSession(timeout=timeout) as session:
            async with session.request("CONNECT", "scholar.google.com:443", proxy=proxy_url) as conn_response:
                conn_response.raise_for_status()
                # Now that the tunnel is established, use https for the GET request
                async with session.get("https://scholar.google.com/scholar", proxy=proxy_url) as get_response:
                    get_response.raise_for_status()
                    logger.info(f"Successfully fetched Google using proxy: {proxy}")
                    working_proxies.append(proxy)

    except aiohttp.ClientProxyConnectionError as e:
        logger.error(f"Proxy connection error for {proxy}: {e}")
    except aiohttp.ClientError as e:
        logger.error(f"Client error for {proxy}: {e}")
    except asyncio.TimeoutError:
        logger.error(f"Timeout error for {proxy}")
    except Exception as e:
        logger.exception(f"Unexpected error testing proxy {proxy}: {e}")


async def main():
    """Main function to fetch, test, and print proxies."""
    try:
        proxy_url = "https://advanced.name/freeproxy/67b5d6e7c5c96?type=https"
        proxies_list = await fetch_proxies(proxy_url)
        working_proxies = []

        if proxies_list:
            print(f"\nTotal proxies fetched: {len(proxies_list)}")

            # Perform simple tests first (using http)
            print("\nPerforming simple tests (HTTP)...")
            simple_test_results = await asyncio.gather(*[simple_test_proxy(proxy) for proxy in proxies_list])

            # Filter proxies that passed the simple test
            simply_tested_proxies = [proxy for proxy, passed in zip(proxies_list, simple_test_results) if passed]

            if simply_tested_proxies:
                print(f"\n{len(simply_tested_proxies)} proxies passed initial HTTP test")

                # Test against scholar.google.com (HTTPS)
                print(f"\nTesting {len(simply_tested_proxies)} proxies against scholar.google.com (HTTPS)")
                test_tasks = [test_proxy(proxy, working_proxies) for proxy in simply_tested_proxies]
                await asyncio.gather(*test_tasks)
            else:
                print("No proxies passed initial test.")

            print(f"\nWorking HTTPS proxies (scholar.google.com): {len(working_proxies)}")
            for proxy in working_proxies:
                print(proxy)
        else:
            print("No proxies found.")

    except Exception as e:
        logger.exception(f"An error occurred within main: {e}")


await main()


2025-02-19 17:21:32,251 | INFO | 3224505320.py:43 | simple_test_proxy | Proxy 13.246.209.48:1080 test successful. Response: {'origin': '13.246.37.12'}
2025-02-19 17:21:32,256 | INFO | 3224505320.py:43 | simple_test_proxy | Proxy 13.246.184.110:3128 test successful. Response: {'origin': '13.246.37.12'}
2025-02-19 17:21:32,282 | INFO | 3224505320.py:43 | simple_test_proxy | Proxy 54.179.44.51:3128 test successful. Response: {'origin': '47.129.126.231'}
2025-02-19 17:21:32,285 | INFO | 3224505320.py:43 | simple_test_proxy | Proxy 13.213.114.238:3128 test successful. Response: {'origin': '47.129.126.231'}
2025-02-19 17:21:32,286 | INFO | 3224505320.py:43 | simple_test_proxy | Proxy 54.179.39.14:3128 test successful. Response: {'origin': '47.129.126.231'}



Total proxies fetched: 105

Performing simple tests (HTTP)...


2025-02-19 17:21:32,413 | INFO | 3224505320.py:43 | simple_test_proxy | Proxy 54.248.238.110:80 test successful. Response: {'origin': '35.77.223.109'}
2025-02-19 17:21:32,413 | INFO | 3224505320.py:43 | simple_test_proxy | Proxy 35.72.118.126:80 test successful. Response: {'origin': '35.77.223.109'}
2025-02-19 17:21:32,416 | INFO | 3224505320.py:43 | simple_test_proxy | Proxy 35.76.62.196:80 test successful. Response: {'origin': '35.77.223.109'}
2025-02-19 17:21:32,419 | INFO | 3224505320.py:43 | simple_test_proxy | Proxy 46.51.249.135:3128 test successful. Response: {'origin': '35.77.223.109'}
2025-02-19 17:21:32,424 | INFO | 3224505320.py:43 | simple_test_proxy | Proxy 43.200.77.128:3128 test successful. Response: {'origin': '43.202.54.7'}
2025-02-19 17:21:32,425 | INFO | 3224505320.py:43 | simple_test_proxy | Proxy 3.37.125.76:3128 test successful. Response: {'origin': '43.202.54.7'}
2025-02-19 17:21:32,426 | INFO | 3224505320.py:43 | simple_test_proxy | Proxy 13.208.56.180:80 test 


82 proxies passed initial HTTP test

Testing 82 proxies against scholar.google.com (HTTPS)

Working HTTPS proxies (scholar.google.com): 0


In [None]:
# Set up logging
logging.basicConfig(
    format="%(asctime)s | %(levelname)s | %(filename)s:%(lineno)d | %(funcName)s | %(message)s",
    level=logging.INFO,
)
logger = logging.getLogger(__name__)


async def fetch_proxies(url):
    """Fetches and parses proxies from the given URL."""
    proxies = []
    timeout = aiohttp.ClientTimeout(total=30)
    try:
        async with aiohttp.ClientSession(timeout=timeout) as session:
            async with session.get(url) as response:
                response.raise_for_status()
                text = await response.text()
                proxies = text.splitlines()
                proxies = [proxy.strip() for proxy in proxies if proxy.strip()]

    except aiohttp.ClientError as e:
        logger.error(f"Client error fetching proxies: {e}")
    except Exception as e:
        logger.exception(f"Error fetching proxies from {url}: {e}")
    return proxies


async def simple_test_proxy(proxy):
    """Performs a simple GET request through the proxy to a test site."""
    proxy_url = f"http://{proxy}"  # Use http for initial test
    timeout = aiohttp.ClientTimeout(total=5)
    test_url = "http://httpbin.org/ip"

    try:
        async with aiohttp.ClientSession(timeout=timeout) as session:
            async with session.get(test_url, proxy=proxy_url) as response:
                response.raise_for_status()
                data = await response.json()
                logger.info(f"Proxy {proxy} test successful. Response: {data}")
                return True

    except Exception as e:
        logger.debug(f"Proxy {proxy} test failed: {e}")  # Debug level for failed simple tests
        return False


async def test_proxy(proxy, working_proxies):
    """Tests an HTTPS proxy using CONNECT (Corrected Version)."""
    proxy_url = f"http://{proxy}"  # Use http:// for the proxy URL
    timeout = aiohttp.ClientTimeout(total=10)
    connect_url = "https://scholar.google.com/"  # URL for the final GET request

    # --- CRUCIAL CHANGE: Parse the URL to get host and port ---
    parsed_url = urlparse(connect_url)
    connect_host = parsed_url.hostname
    connect_port = parsed_url.port if parsed_url.port else 443  # Default to 443 for HTTPS
    # --- END CRUCIAL CHANGE ---

    try:
        async with aiohttp.ClientSession(timeout=timeout) as session:
            # 1. Establish the CONNECT tunnel
            try:
                async with session.request(
                    "CONNECT",
                    f"http://{connect_host}:{connect_port}",  # Correct CONNECT URL
                    proxy=proxy_url,
                    headers={"Host": connect_host},  # aiohttp requires the Host header
                ) as conn_response:
                    conn_response.raise_for_status()  # Check for 200 OK
                    logger.info(f"CONNECT tunnel established via {proxy}")

                    # 2. Now make the actual GET request *through* the tunnel
                    async with session.get(
                        connect_url,  # Now use the full URL
                        ssl=True,  # ssl=True is crucial here
                        headers={"Host": connect_host},
                    ) as get_response:
                        get_response.raise_for_status()
                        logger.info(f"Successfully fetched {connect_url} using proxy: {proxy}")
                        # logger.info(f"Response (first 500 chars):\n{text_data[:500]}")
                        working_proxies.append(proxy)
            except aiohttp.ClientProxyConnectionError as e:
                logger.error(f"Proxy connection error: {e}")
                if e.status:
                    logger.error(f"  Proxy status: {e.status}")
                if e.message:
                    logger.error(f"  Proxy message: {e.message}")
            except aiohttp.ClientResponseError as e:  # Catch HTTP errors after the tunnel
                logger.error(f"HTTP error after CONNECT: {e.status} - {e.message}")
            except Exception:  # Other exceptions during CONNECT
                logger.exception("Error during CONNECT:", exc_info=True)

    except Exception as e:  # General exception for session creation
        logger.exception(f"Error testing proxy {proxy}:", exc_info=True)
        # The detailed exception handling you had is good, keep it:
        try:  # Try to get even lower-level details
            if isinstance(e, aiohttp.ClientProxyConnectionError):
                logger.error(f"  Proxy connection error details: {e.args}")
            if isinstance(e, aiohttp.ClientConnectorError):
                logger.error(f"   OS Error details {e.os_error}")
        except:
            pass


async def main():
    """Main function to fetch, test, and print proxies."""
    try:
        proxy_url = "https://advanced.name/freeproxy/67b5d6e7c5c96?type=https"
        proxies_list = await fetch_proxies(proxy_url)
        working_proxies = []

        if proxies_list:
            print(f"\nTotal proxies fetched: {len(proxies_list)}")

            # Perform simple tests first (using http)
            print("\nPerforming simple tests (HTTP)...")
            simple_test_results = await asyncio.gather(*[simple_test_proxy(proxy) for proxy in proxies_list])

            # Filter proxies that passed the simple test
            simply_tested_proxies = [proxy for proxy, passed in zip(proxies_list, simple_test_results) if passed]

            if simply_tested_proxies:
                print(f"\n{len(simply_tested_proxies)} proxies passed initial HTTP test")

                # Test against scholar.google.com (HTTPS)
                print(f"\nTesting {len(simply_tested_proxies)} proxies against scholar.google.com (HTTPS)")
                test_tasks = [test_proxy(proxy, working_proxies) for proxy in simply_tested_proxies]
                await asyncio.gather(*test_tasks)
            else:
                print("No proxies passed initial test.")

            print(f"\nWorking HTTPS proxies (scholar.google.com): {len(working_proxies)}")
            for proxy in working_proxies:
                print(proxy)
        else:
            print("No proxies found.")

    except Exception as e:
        logger.exception(f"An error occurred within main: {e}")


await main()


2025-02-19 17:47:24,210 | INFO | 585663811.py:44 | simple_test_proxy | Proxy 13.246.184.110:3128 test successful. Response: {'origin': '13.246.37.12'}
2025-02-19 17:47:24,212 | INFO | 585663811.py:44 | simple_test_proxy | Proxy 13.246.209.48:1080 test successful. Response: {'origin': '13.246.37.12'}
2025-02-19 17:47:24,251 | INFO | 585663811.py:44 | simple_test_proxy | Proxy 54.179.39.14:3128 test successful. Response: {'origin': '47.129.126.231'}
2025-02-19 17:47:24,252 | INFO | 585663811.py:44 | simple_test_proxy | Proxy 54.179.44.51:3128 test successful. Response: {'origin': '47.129.126.231'}
2025-02-19 17:47:24,252 | INFO | 585663811.py:44 | simple_test_proxy | Proxy 13.213.114.238:3128 test successful. Response: {'origin': '47.129.126.231'}



Total proxies fetched: 111

Performing simple tests (HTTP)...


2025-02-19 17:47:24,378 | INFO | 585663811.py:44 | simple_test_proxy | Proxy 35.72.118.126:80 test successful. Response: {'origin': '35.77.223.109'}
2025-02-19 17:47:24,379 | INFO | 585663811.py:44 | simple_test_proxy | Proxy 54.248.238.110:80 test successful. Response: {'origin': '35.77.223.109'}
2025-02-19 17:47:24,380 | INFO | 585663811.py:44 | simple_test_proxy | Proxy 3.37.125.76:3128 test successful. Response: {'origin': '43.202.54.7'}
2025-02-19 17:47:24,390 | INFO | 585663811.py:44 | simple_test_proxy | Proxy 35.79.120.242:3128 test successful. Response: {'origin': '35.77.223.109'}
2025-02-19 17:47:24,391 | INFO | 585663811.py:44 | simple_test_proxy | Proxy 52.196.1.182:80 test successful. Response: {'origin': '35.77.223.109'}
2025-02-19 17:47:24,392 | INFO | 585663811.py:44 | simple_test_proxy | Proxy 43.202.154.212:80 test successful. Response: {'origin': '43.202.54.7'}
2025-02-19 17:47:24,392 | INFO | 585663811.py:44 | simple_test_proxy | Proxy 13.208.56.180:80 test successf


83 proxies passed initial HTTP test

Testing 83 proxies against scholar.google.com (HTTPS)


2025-02-19 17:47:29,583 | INFO | 585663811.py:75 | test_proxy | CONNECT tunnel established via 125.26.211.159:8080
2025-02-19 17:47:29,600 | INFO | 585663811.py:75 | test_proxy | CONNECT tunnel established via 35.76.62.196:80
2025-02-19 17:47:29,604 | INFO | 585663811.py:75 | test_proxy | CONNECT tunnel established via 35.72.118.126:80
2025-02-19 17:47:29,605 | INFO | 585663811.py:75 | test_proxy | CONNECT tunnel established via 52.196.1.182:80
2025-02-19 17:47:29,609 | INFO | 585663811.py:75 | test_proxy | CONNECT tunnel established via 35.79.120.242:3128
2025-02-19 17:47:29,611 | INFO | 585663811.py:75 | test_proxy | CONNECT tunnel established via 43.200.77.128:3128
2025-02-19 17:47:29,614 | INFO | 585663811.py:75 | test_proxy | CONNECT tunnel established via 46.51.249.135:3128
2025-02-19 17:47:29,614 | INFO | 585663811.py:75 | test_proxy | CONNECT tunnel established via 54.248.238.110:80
2025-02-19 17:47:29,620 | INFO | 585663811.py:75 | test_proxy | CONNECT tunnel established via 4


Working HTTPS proxies (scholar.google.com): 80
13.246.184.110:3128
18.185.169.150:3128
54.179.44.51:3128
3.126.147.182:80
13.48.109.48:3128
3.122.84.99:3128
3.124.133.93:3128
54.179.39.14:3128
51.20.19.159:3128
54.248.238.110:80
13.38.153.36:80
43.200.77.128:3128
3.127.62.252:80
51.17.58.162:3128
3.90.100.12:80
3.123.150.192:80
13.37.89.201:80
16.16.239.39:3128
15.236.106.236:3128
52.16.232.164:3128
63.32.1.88:3128
44.218.183.55:80
52.196.1.182:80
13.36.104.85:80
43.201.121.81:80
3.78.92.159:3128
3.141.217.225:80
3.99.167.1:3128
46.51.249.135:3128
3.21.101.158:3128
52.65.193.254:3128
3.129.184.210:80
51.20.50.149:3128
35.72.118.126:80
125.26.211.159:8080
18.228.149.161:80
3.97.176.251:3128
51.16.199.206:3128
3.71.239.218:3128
204.236.137.68:80
13.36.87.105:3128
3.12.144.146:3128
13.213.114.238:3128
52.73.224.54:3128
52.63.129.110:3128
51.16.179.113:1080
54.152.3.36:80
35.76.62.196:80
3.127.121.101:80
18.223.25.15:80
3.130.65.162:3128
35.79.120.242:3128
13.37.73.214:80
43.202.154.212:8

In [38]:


async def test_proxy(proxy):
    """Tests an HTTPS proxy using CONNECT (Corrected Version)."""
    proxy_url = f"http://{proxy}"  # Use http:// for the proxy URL
    timeout = aiohttp.ClientTimeout(total=10)
    connect_url = "https://scholar.google.com/"  # URL for the final GET request

    # --- CRUCIAL CHANGE: Parse the URL to get host and port ---
    parsed_url = urlparse(connect_url)
    connect_host = parsed_url.hostname
    connect_port = parsed_url.port if parsed_url.port else 443  # Default to 443 for HTTPS
    # --- END CRUCIAL CHANGE ---

    try:
        async with aiohttp.ClientSession(timeout=timeout) as session:
            # 1. Establish the CONNECT tunnel
            try:
                async with session.request(
                    "CONNECT",
                    f"http://{connect_host}:{connect_port}",  # Correct CONNECT URL
                    proxy=proxy_url,
                    headers={"Host": connect_host},  # aiohttp requires the Host header
                ) as conn_response:
                    conn_response.raise_for_status()  # Check for 200 OK
                    logger.info(f"CONNECT tunnel established via {proxy}")

                    # 2. Now make the actual GET request *through* the tunnel
                    async with session.get(
                        connect_url,  # Now use the full URL
                        ssl=True,  # ssl=True is crucial here
                        headers={"Host": connect_host},
                    ) as get_response:
                        get_response.raise_for_status()
                        # data = await get_response.json()
                        text_data = await get_response.text()
                        # logger.info(f"Successfully fetched {connect_url} using proxy: {proxy}, Response {data}")
                        logger.info(f"Response (first 500 chars):\n{text_data[:500]}")
            except aiohttp.ClientProxyConnectionError as e:
                logger.error(f"Proxy connection error: {e}")
                if e.status:
                    logger.error(f"  Proxy status: {e.status}")
                if e.message:
                    logger.error(f"  Proxy message: {e.message}")
            except aiohttp.ClientResponseError as e:  # Catch HTTP errors after the tunnel
                logger.error(f"HTTP error after CONNECT: {e.status} - {e.message}")
            except Exception:  # Other exceptions during CONNECT
                logger.exception("Error during CONNECT:", exc_info=True)

    except Exception as e:  # General exception for session creation
        logger.exception(f"Error testing proxy {proxy}:", exc_info=True)
        # The detailed exception handling you had is good, keep it:
        try:  # Try to get even lower-level details
            if isinstance(e, aiohttp.ClientProxyConnectionError):
                logger.error(f"  Proxy connection error details: {e.args}")
            if isinstance(e, aiohttp.ClientConnectorError):
                logger.error(f"   OS Error details {e.os_error}")
        except:
            pass


In [39]:
await test_proxy("13.36.113.81:3128")

2025-02-19 17:52:39,125 | INFO | 3473874145.py:27 | test_proxy | CONNECT tunnel established via 13.36.113.81:3128
2025-02-19 17:52:40,316 | INFO | 3473874145.py:39 | test_proxy | Response (first 500 chars):
<!doctype html><html><head><title>Google Scholar</title><meta http-equiv="Content-Type" content="text/html;charset=ISO-8859-1"><meta http-equiv="X-UA-Compatible" content="IE=Edge"><meta name="referrer" content="always"><meta name="viewport" content="width=device-width,initial-scale=1,minimum-scale=1,maximum-scale=2"><meta name="format-detection" content="telephone=no"><link rel="shortcut icon" href="/favicon.ico"><meta name="google-site-verification" content="Y8J1_s45IeTudoUMT5t7AB5kel7unVEK-Wjx


In [None]:
# scholar_scraper/scholar_scraper/proxy_manager.py
import logging



class NoProxiesAvailable(Exception):
    pass


# Set up logging
logging.basicConfig(
    format="%(asctime)s | %(levelname)s | %(filename)s:%(lineno)d | %(funcName)s | %(message)s", level=logging.DEBUG
)
logger = logging.getLogger(__name__)


class ProxyManager:
    def __init__(self, timeout=2):
        # We let FreeProxy handle filtering.
        self.fp = FreeProxy(timeout=timeout)
        self.proxy_list = []
        self.logger = logging.getLogger(__name__)
        self.test_url = "https://scholar.google.com/"  # Test directly with Scholar
        self.timeout = timeout  # store for later use
        self.num_proxies = 30
        # No semaphore needed with aiohttp, connection pooling is handled.

    async def _test_proxy(self, proxy):
        """Tests an HTTPS proxy using CONNECT."""
        proxy_url = f"http://{proxy}"  # Use http:// for the proxy URL
        timeout = aiohttp.ClientTimeout(total=self.timeout)
        connect_url = self.test_url

        parsed_url = urlparse(connect_url)
        connect_host = parsed_url.hostname
        connect_port = parsed_url.port if parsed_url.port else 443

        try:
            async with aiohttp.ClientSession(timeout=timeout) as session:
                try:
                    async with session.request(
                        "CONNECT",
                        f"http://{connect_host}:{connect_port}",
                        proxy=proxy_url,
                        headers={"Host": connect_host},
                    ) as conn_response:
                        conn_response.raise_for_status()
                        logger.debug(f"CONNECT tunnel established via {proxy}")

                        async with session.get(
                            connect_url,
                            ssl=True,
                            headers={"Host": connect_host},
                        ) as get_response:
                            get_response.raise_for_status()
                            logger.info(f"Successfully fetched {connect_url} using proxy: {proxy}")
                            return proxy  # Return the proxy if successful

                except aiohttp.ClientProxyConnectionError as e:
                    logger.debug(f"Proxy connection error: {e}")
                except aiohttp.ClientResponseError as e:
                    logger.debug(f"HTTP error after CONNECT: {e.status} - {e.message}")
                except Exception as e:
                    logger.debug(f"Error during CONNECT: {type(e).__name__}: {e}")

        except Exception as e:
            logger.debug(f"Error testing proxy {proxy}: {type(e).__name__}: {e}")
        return None  # Explicitly return None if it failed

    async def get_working_proxies(self):
        """Gets a list of working proxies, testing for CONNECT to scholar.google.com."""
        raw_proxies = self.fp.get_proxy_list(repeat=False)
        # print(raw_proxies) # Removed print statement for large lists
        if not raw_proxies:
            raise NoProxiesAvailable("No raw proxies found from free-proxy.")

        working_proxies = []
        tasks = [self._test_proxy(proxy) for proxy in raw_proxies]
        results = await asyncio.gather(*tasks)  # Run all tests concurrently

        for proxy in results:
            if proxy:
                working_proxies.append(proxy)
                if len(working_proxies) >= self.num_proxies:
                    break  # Stop as soon as we have the desired amount

        self.proxy_list = working_proxies
        self.logger.info(f"Found {len(self.proxy_list)} working proxies.")

        if not self.proxy_list:
            raise NoProxiesAvailable("No working proxies found after testing")
        return self.proxy_list

    def get_random_proxy(self):
        """Returns a random proxy from the list of working proxies."""
        if self.proxy_list:
            return random.choice(self.proxy_list)
        return None


In [42]:
proxy_manager = ProxyManager(timeout=5)
working_proxies = await proxy_manager.get_working_proxies()
print(f"Working proxies: {working_proxies}")

random_proxy = proxy_manager.get_random_proxy()
print(f"Randomly selected proxy: {random_proxy}")


2025-02-19 18:00:49,429 | INFO | 993685365.py:58 | _test_proxy | Successfully fetched https://scholar.google.com/ using proxy: 35.79.120.242:3128
2025-02-19 18:00:49,451 | INFO | 993685365.py:58 | _test_proxy | Successfully fetched https://scholar.google.com/ using proxy: 13.213.114.238:3128
2025-02-19 18:00:49,452 | INFO | 993685365.py:58 | _test_proxy | Successfully fetched https://scholar.google.com/ using proxy: 3.97.176.251:3128
2025-02-19 18:00:49,453 | INFO | 993685365.py:58 | _test_proxy | Successfully fetched https://scholar.google.com/ using proxy: 54.228.164.102:3128
2025-02-19 18:00:49,456 | INFO | 993685365.py:58 | _test_proxy | Successfully fetched https://scholar.google.com/ using proxy: 52.16.232.164:3128
2025-02-19 18:00:49,462 | INFO | 993685365.py:58 | _test_proxy | Successfully fetched https://scholar.google.com/ using proxy: 13.55.210.141:3128
2025-02-19 18:00:49,463 | INFO | 993685365.py:58 | _test_proxy | Successfully fetched https://scholar.google.com/ using pro

Working proxies: ['44.219.175.186:80', '52.73.224.54:3128', '54.248.238.110:80', '13.59.156.167:3128', '3.141.217.225:80', '54.152.3.36:80', '3.212.148.199:3128', '51.20.19.159:3128', '54.179.44.51:3128', '3.97.167.115:3128', '3.97.176.251:3128', '13.213.114.238:3128', '52.65.193.254:3128', '15.156.24.206:3128', '13.246.184.110:3128', '51.16.199.206:3128', '3.130.65.162:3128', '204.236.176.61:3128', '52.67.10.183:80', '54.233.119.172:3128', '43.201.121.81:80', '43.200.77.128:3128', '3.21.101.158:3128', '3.139.242.184:80', '46.51.249.135:3128', '3.127.121.101:80', '3.127.62.252:80', '35.76.62.196:80', '43.202.154.212:80', '35.72.118.126:80']
Randomly selected proxy: 3.130.65.162:3128


In [1]:
import logging
from typing import List, Optional



class NoProxiesAvailable(Exception):
    pass


logging.basicConfig(
    format="%(asctime)s | %(levelname)s | %(filename)s:%(lineno)d | %(funcName)s | %(message)s", level=logging.DEBUG
)
logger = logging.getLogger(__name__)


class ProxyManager:
    def __init__(self, timeout=5, refresh_interval=300, blacklist_duration=600, num_proxies=20):
        self.logger = logging.getLogger(__name__)
        self.fp = FreeProxy()
        self.proxy_list = []
        self.blacklist = {}  # {proxy: timestamp}
        self.refresh_interval = refresh_interval
        self.blacklist_duration = blacklist_duration
        self.last_refresh = 0
        self.num_proxies = num_proxies
        self.timeout = timeout
        self.test_url = "https://scholar.google.com/"  # Test with Google Scholar

    async def _test_proxy(self, proxy: str) -> Optional[str]:
        """Test if a proxy is working using aiohttp and CONNECT."""
        if proxy in self.blacklist and time.time() - self.blacklist[proxy] < self.blacklist_duration:
            return None

        proxy_url = f"http://{proxy}"
        timeout = aiohttp.ClientTimeout(total=self.timeout)
        connect_url = self.test_url
        parsed_url = urlparse(connect_url)
        connect_host = parsed_url.hostname
        connect_port = parsed_url.port if parsed_url.port else 443

        try:
            async with aiohttp.ClientSession(timeout=timeout) as session:
                try:
                    async with session.request(
                        "CONNECT",
                        f"http://{connect_host}:{connect_port}",
                        proxy=proxy_url,
                        headers={"Host": connect_host},
                    ) as conn_response:
                        conn_response.raise_for_status()
                        self.logger.debug(f"CONNECT tunnel established via {proxy}")

                        async with session.get(
                            connect_url,
                            ssl=True,
                            headers={"Host": connect_host},
                        ) as get_response:
                            get_response.raise_for_status()
                            self.logger.info(f"Successfully fetched {connect_url} using proxy: {proxy}")
                            return proxy  # Return just the proxy (no latency)

                except aiohttp.ClientProxyConnectionError as e:
                    self.logger.debug(f"Proxy connection error: {e}")
                except aiohttp.ClientResponseError as e:
                    self.logger.debug(f"HTTP error after CONNECT: {e.status} - {e.message}")
                except Exception as e:
                    self.logger.debug(f"Error during CONNECT: {type(e).__name__}: {e}")
        except Exception as e:
            self.logger.debug(f"Error testing proxy {proxy}: {type(e).__name__}: {e}")

        return None

    async def get_working_proxies(self) -> List[str]:
        """Fetch, test, and return a list of working proxies."""
        current_time = time.time()
        if current_time - self.last_refresh < self.refresh_interval and self.proxy_list:
            return self.proxy_list

        raw_proxies = self.fp.get_proxy_list(repeat=True)
        self.logger.debug(f"Fetched proxies: {raw_proxies}")
        if not raw_proxies:
            self.logger.warning("No proxies found from FreeProxy.")
            raise NoProxiesAvailable("No raw proxies found.")

        tasks = [self._test_proxy(proxy) for proxy in raw_proxies]
        results = await asyncio.gather(*tasks)

        working_proxies = [proxy for proxy in results if proxy]  # Filter out None values
        self.proxy_list = working_proxies[: self.num_proxies]  # Limit to the first num_proxies
        self.last_refresh = time.time()

        if not self.proxy_list:
            self.logger.warning("No working proxies found.")
            raise NoProxiesAvailable("No working proxies found.")

        return self.proxy_list

    async def refresh_proxies(self):
        """Force refresh the proxy list."""
        await self.get_working_proxies()

    def get_random_proxy(self) -> Optional[str]:
        """Return a random working proxy."""
        try:
            if not self.proxy_list:
                asyncio.run(self.refresh_proxies())  # Blocks the calling method.
            return random.choice(self.proxy_list) if self.proxy_list else None
        except NoProxiesAvailable:
            return None

    def remove_proxy(self, proxy: str):
        """Remove a proxy and blacklist it."""
        if proxy in self.proxy_list:
            self.proxy_list.remove(proxy)
            self.blacklist[proxy] = time.time()
            self.logger.info(f"Removed proxy {proxy} and added to blacklist.")


async def test_proxy_manager():
    """Tests the ProxyManager."""
    try:
        proxy_manager = ProxyManager(timeout=10)  # Increased timeout
        working_proxies = await proxy_manager.get_working_proxies()
        print(f"Working proxies: {working_proxies}")

        random_proxy = proxy_manager.get_random_proxy()
        print(f"Randomly selected proxy: {random_proxy}")

        if random_proxy:
            proxy_manager.remove_proxy(random_proxy)
            print(f"Removed proxy: {random_proxy}")
            print(f"Blacklist: {proxy_manager.blacklist}")
        else:
            print("No proxy to remove.")

    except NoProxiesAvailable as e:
        print(f"Error: {e}")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")


# if __name__ == "__main__":
#     asyncio.run(test_proxy_manager())
await test_proxy_manager()

2025-02-19 20:25:48,942 | DEBUG | connectionpool.py:1049 | _new_conn | Starting new HTTPS connection (1): free-proxy-list.net:443
2025-02-19 20:25:49,671 | DEBUG | connectionpool.py:544 | _make_request | https://free-proxy-list.net:443 "GET / HTTP/1.1" 200 None
2025-02-19 20:25:49,955 | DEBUG | 1183542168.py:86 | get_working_proxies | Fetched proxies: ['200.174.198.86:8888', '5.161.103.41:88', '147.182.180.242:80', '47.250.159.65:8443', '47.91.109.17:8108', '85.215.64.49:80', '34.81.72.31:80', '23.247.137.142:80', '188.253.112.218:80', '165.232.129.150:80', '103.152.112.120:80', '23.247.136.245:80', '23.247.136.254:80', '103.152.112.157:80', '180.210.89.215:3128', '81.169.213.169:8888', '207.2.120.16:80', '195.114.209.50:80', '103.149.201.160:30008', '31.47.58.37:80', '51.68.175.56:1080', '202.154.18.138:8080', '86.98.17.170:8080', '70.36.118.124:30003', '31.58.58.213:10004', '172.233.78.254:7890', '182.52.83.228:34599', '159.65.245.255:80', '5.45.126.128:8080', '143.42.66.91:80', '207

Working proxies: ['200.174.198.86:8888', '47.250.159.65:8443', '47.91.109.17:8108', '103.149.201.160:30008', '51.68.175.56:1080', '202.154.18.138:8080', '86.98.17.170:8080', '70.36.118.124:30003', '31.58.58.213:10004', '172.233.78.254:7890', '182.52.83.228:34599', '222.252.194.204:8080', '8.219.97.248:80', '47.251.122.81:8888', '13.38.153.36:80', '13.36.87.105:3128', '44.218.183.55:80', '44.195.247.145:80', '37.187.25.85:80', '65.1.244.232:80']
Randomly selected proxy: 44.195.247.145:80
Removed proxy: 44.195.247.145:80
Blacklist: {'44.195.247.145:80': 1739982360.3630235}


In [45]:
import logging
from typing import List, Optional

import httpx


class NoProxiesAvailable(Exception):
    pass


logging.basicConfig(
    format="%(asctime)s | %(levelname)s | %(filename)s:%(lineno)d | %(funcName)s | %(message)s", level=logging.DEBUG
)
logger = logging.getLogger(__name__)


class ProxyManager:
    def __init__(self, timeout=5, refresh_interval=300, blacklist_duration=600, num_proxies=20):
        self.logger = logging.getLogger(__name__)
        self.fp = FreeProxy()
        self.proxy_list = []
        self.blacklist = {}  # {proxy: timestamp}
        self.refresh_interval = refresh_interval
        self.blacklist_duration = blacklist_duration
        self.last_refresh = 0
        self.num_proxies = num_proxies
        self.timeout = timeout
        self.test_url = "https://scholar.google.com/"  # Test with Google Scholar
        self.semaphore = asyncio.Semaphore(num_proxies)

    async def _test_proxy(self, proxy: str) -> Optional[str]:
        """Test if a proxy is working using httpx and CONNECT."""
        if proxy in self.blacklist and time.time() - self.blacklist[proxy] < self.blacklist_duration:
            return None

        proxy_url = f"http://{proxy}"
        # httpx requires mounts for different protocols when using a proxy
        proxy_mounts = {
            "http://": httpx.AsyncHTTPTransport(proxy=proxy_url),
            "https://": httpx.AsyncHTTPTransport(proxy=proxy_url),
        }
        connect_url = self.test_url
        parsed_url = urlparse(connect_url)
        connect_host = parsed_url.hostname
        connect_port = parsed_url.port if parsed_url.port else 443

        try:
            async with self.semaphore:  # Control concurrency
                async with httpx.AsyncClient(mounts=proxy_mounts, timeout=self.timeout) as client:
                    try:
                        # CONNECT request (tunnel establishment)
                        # httpx doesn't have a direct "CONNECT" method, but we use a normal request to
                        # trigger the tunnel.  It *must* be to an http:// URL for the CONNECT phase.
                        conn_response = await client.get(f"http://{connect_host}:{connect_port}")
                        conn_response.raise_for_status()  # Check for 200 OK
                        self.logger.debug(f"CONNECT tunnel established via {proxy}")

                        # Actual GET request through the tunnel
                        get_response = await client.get(connect_url)
                        get_response.raise_for_status()
                        self.logger.info(f"Successfully fetched {connect_url} using proxy: {proxy}")
                        return proxy

                    except httpx.RequestError as e:
                        self.logger.debug(f"Proxy connection error: {e}")
                    except httpx.HTTPStatusError as e:
                        self.logger.debug(f"HTTP error after CONNECT: {e.response.status_code} - {e}")
                    except Exception as e:
                        self.logger.debug(f"Error during CONNECT: {type(e).__name__}: {e}")

        except Exception as e:
            self.logger.debug(f"Error testing proxy {proxy}: {type(e).__name__}: {e}")
        return None

    async def get_working_proxies(self) -> List[str]:
        """Fetch, test, and return a list of working proxies."""
        current_time = time.time()
        if current_time - self.last_refresh < self.refresh_interval and self.proxy_list:
            return self.proxy_list

        raw_proxies = self.fp.get_proxy_list(repeat=True)
        self.logger.debug(f"Fetched proxies: {raw_proxies}")
        if not raw_proxies:
            self.logger.warning("No proxies found from FreeProxy.")
            raise NoProxiesAvailable("No raw proxies found.")

        tasks = [self._test_proxy(proxy) for proxy in raw_proxies]
        results = await asyncio.gather(*tasks)

        working_proxies = [proxy for proxy in results if proxy]
        self.proxy_list = working_proxies[: self.num_proxies]
        self.last_refresh = time.time()

        if not self.proxy_list:
            self.logger.warning("No working proxies found.")
            raise NoProxiesAvailable("No working proxies found.")

        return self.proxy_list

    async def refresh_proxies(self):
        """Force refresh the proxy list."""
        await self.get_working_proxies()

    def get_random_proxy(self) -> Optional[str]:
        """Return a random working proxy."""
        try:
            if not self.proxy_list:
                asyncio.run(self.refresh_proxies())
            return random.choice(self.proxy_list) if self.proxy_list else None
        except NoProxiesAvailable:
            return None

    def remove_proxy(self, proxy: str):
        """Remove a proxy and blacklist it."""
        if proxy in self.proxy_list:
            self.proxy_list.remove(proxy)
            self.blacklist[proxy] = time.time()
            self.logger.info(f"Removed proxy {proxy} and added to blacklist.")


In [46]:
proxy_manager = ProxyManager(timeout=10)  # Increased timeout
working_proxies = await proxy_manager.get_working_proxies()
print(f"Working proxies: {working_proxies}")

random_proxy = proxy_manager.get_random_proxy()
print(f"Randomly selected proxy: {random_proxy}")

if random_proxy:
    proxy_manager.remove_proxy(random_proxy)
    print(f"Removed proxy: {random_proxy}")
    print(f"Blacklist: {proxy_manager.blacklist}")
else:
    print("No proxy to remove.")

2025-02-19 18:19:59,788 | INFO | _client.py:1740 | _send_single_request | HTTP Request: GET http://scholar.google.com:443 "HTTP/1.1 502 Bad Gateway"
2025-02-19 18:19:59,974 | INFO | _client.py:1740 | _send_single_request | HTTP Request: GET http://scholar.google.com:443 "HTTP/1.1 400 Bad Request"
2025-02-19 18:19:59,975 | INFO | _client.py:1740 | _send_single_request | HTTP Request: GET http://scholar.google.com:443 "HTTP/1.1 502 DNS lookup error"
2025-02-19 18:20:00,163 | INFO | _client.py:1740 | _send_single_request | HTTP Request: GET http://scholar.google.com:443 "HTTP/1.1 502 Bad Gateway"
2025-02-19 18:20:00,164 | INFO | _client.py:1740 | _send_single_request | HTTP Request: GET http://scholar.google.com:443 "HTTP/1.1 502 Bad Gateway"
2025-02-19 18:20:00,164 | INFO | _client.py:1740 | _send_single_request | HTTP Request: GET http://scholar.google.com:443 "HTTP/1.1 502 Bad Gateway"
2025-02-19 18:20:01,290 | INFO | _client.py:1740 | _send_single_request | HTTP Request: GET http://sc

NoProxiesAvailable: No working proxies found.