|
| 1 | +from pathlib import Path |
| 2 | +import logging |
| 3 | +import requests |
| 4 | +from requests.exceptions import ProxyError |
| 5 | +import pandas as pd |
| 6 | +from json.decoder import JSONDecodeError |
| 7 | + |
| 8 | +logging.basicConfig(level=logging.INFO) |
| 9 | + |
| 10 | + |
| 11 | +def add_proxies_to_file(csv_path: str, proxies: list): |
| 12 | + '''This function will add one or multiple proxies to the CSV file.''' |
| 13 | + |
| 14 | + if not csv_path.exists(): |
| 15 | + proxies_file: pd.DataFrame = pd.DataFrame(columns=['proxy_type', 'proxy_address', 'proxy_status']) |
| 16 | + logging.info('New CSV file will be created') |
| 17 | + else: |
| 18 | + proxies_file: pd.DataFrame = pd.read_csv(csv_path) |
| 19 | + logging.info('Existing CSV file has been loaded') |
| 20 | + |
| 21 | + for proxy in proxies: |
| 22 | + if len(proxies_file) == 0: |
| 23 | + # First proxy in the file |
| 24 | + proxies_file = proxies_file.append(proxy, ignore_index=True) |
| 25 | + else: |
| 26 | + if len(proxies_file.loc[ (proxies_file['proxy_type'] == proxy['proxy_type']) & (proxies_file['proxy_address'] == proxy['proxy_address'])]) > 0: |
| 27 | + # Proxy is already in the file |
| 28 | + proxies_file.loc[ (proxies_file['proxy_type'] == proxy['proxy_type']) & (proxies_file['proxy_address'] == proxy['proxy_address']) , ['proxy_status']] = proxy['proxy_status'] |
| 29 | + else: |
| 30 | + # Proxy is not yet in the file |
| 31 | + proxies_file = proxies_file.append(proxy, ignore_index=True) |
| 32 | + |
| 33 | + |
| 34 | + proxies_file = proxies_file.drop_duplicates() |
| 35 | + proxies_file.to_csv(csv_path, index=False) |
| 36 | + logging.info('CSV file has been written') |
| 37 | + |
| 38 | + |
| 39 | +def test_proxy(proxy_type: str, proxy_address: str, iptest: str): |
| 40 | + '''This function takes a proxy (type, address) and tests it against a given iptest adress.''' |
| 41 | + |
| 42 | + logging.info(f'Testing proxy: {proxy_address}') |
| 43 | + |
| 44 | + try: |
| 45 | + proxies = {proxy_type: proxy_address} |
| 46 | + proxy_status: str = '' |
| 47 | + r = requests.get('http://iptest.ingokleiber.de', proxies=proxies) |
| 48 | + |
| 49 | + try: |
| 50 | + json_response: dict = r.json() |
| 51 | + |
| 52 | + if json_response["ip"] == proxy_address: |
| 53 | + proxy_status = 'Proxy functional' |
| 54 | + else: |
| 55 | + logging.warning(f'Proxy "{proxy_address}" returned {json_response}') |
| 56 | + proxy_status = 'Proxy not functional' |
| 57 | + except JSONDecodeError: |
| 58 | + proxy_status = 'Invalid response' |
| 59 | + except ProxyError: |
| 60 | + proxy_status = 'Proxy error' |
| 61 | + |
| 62 | + logging.info(f'Proxy {proxy_address}: {proxy_status}') |
| 63 | + return {'proxy_type': proxy_type, 'proxy_address': proxy_address, 'proxy_status': proxy_status} |
| 64 | + |
| 65 | + |
| 66 | +def test_single_proxy(proxy: str, iptest: str, csv_path: str): |
| 67 | + '''This function tests an individual proxy and adds it to the CSV file.''' |
| 68 | + proxy_type, proxy_address = proxy.split('://') |
| 69 | + result: dict = test_proxy(proxy_type, proxy_address, iptest) |
| 70 | + |
| 71 | + add_proxies_to_file(Path(csv_path), [result]) |
| 72 | + |
| 73 | + |
| 74 | +def test_file(iptest: str, csv_path: str): |
| 75 | + '''This function (re)tests every proxy in a given CSV file.''' |
| 76 | + |
| 77 | + csv_path: Path = Path(csv_path) |
| 78 | + |
| 79 | + if csv_path.exists(): |
| 80 | + proxies_file: pd.DataFrame = pd.read_csv(csv_path) |
| 81 | + else: |
| 82 | + raise FileNotFoundError |
| 83 | + |
| 84 | + proxies: list = [] |
| 85 | + |
| 86 | + for index, proxy in proxies_file.iterrows(): |
| 87 | + proxies.append(test_proxy(proxy['proxy_type'], proxy['proxy_address'], iptest)) |
| 88 | + |
| 89 | + add_proxies_to_file(csv_path, proxies) |
| 90 | + |
| 91 | + |
| 92 | +def add_from_text_file(iptest: str, text_path: str, csv_path: str): |
| 93 | + ''' This function adds a list of proxies from a text file (line by line).''' |
| 94 | + text_path: Path = Path(text_path) |
| 95 | + |
| 96 | + if text_path.exists(): |
| 97 | + proxies: list = text_path.read_text().splitlines() |
| 98 | + |
| 99 | + for proxy in proxies: |
| 100 | + # We will treat each proxy as a single proxy and leverage the existing function |
| 101 | + test_single_proxy(proxy, iptest, csv_path) |
| 102 | + else: |
| 103 | + raise FileNotFoundError |
0 commit comments