|
| 1 | +import logging |
| 2 | +from json.decoder import JSONDecodeError |
| 3 | +from pathlib import Path |
| 4 | + |
| 5 | +import pandas as pd |
| 6 | +import requests |
| 7 | +from requests.exceptions import ProxyError |
| 8 | + |
| 9 | +logging.basicConfig(level=logging.INFO) |
| 10 | + |
| 11 | + |
| 12 | +def add_proxies_to_file(csv_path: str, proxies: list): |
| 13 | + '''This function will add one or multiple proxies to the CSV file.''' |
| 14 | + |
| 15 | + if not csv_path.exists(): |
| 16 | + pr_file: pd.DataFrame = pd.DataFrame( |
| 17 | + columns=['proxy_type', 'proxy_address', 'proxy_status']) |
| 18 | + logging.info('New CSV file will be created') |
| 19 | + else: |
| 20 | + pr_file: pd.DataFrame = pd.read_csv(csv_path) |
| 21 | + logging.info('Existing CSV file has been loaded') |
| 22 | + |
| 23 | + for proxy in proxies: |
| 24 | + if len(pr_file) == 0: |
| 25 | + # First proxy in the file |
| 26 | + pr_file = pr_file.append(proxy, ignore_index=True) |
| 27 | + else: |
| 28 | + if len(pr_file.loc[(pr_file['proxy_type'] == proxy['proxy_type']) & |
| 29 | + (pr_file['proxy_address'] == proxy['proxy_address'])]) > 0: |
| 30 | + # Proxy is already in the file |
| 31 | + pr_file.loc[(pr_file['proxy_type'] == proxy['proxy_type']) & |
| 32 | + (pr_file['proxy_address'] == proxy['proxy_address']), |
| 33 | + ['proxy_status']] = proxy['proxy_status'] |
| 34 | + else: |
| 35 | + # Proxy is not yet in the file |
| 36 | + pr_file = pr_file.append(proxy, ignore_index=True) |
| 37 | + |
| 38 | + pr_file = pr_file.drop_duplicates() |
| 39 | + pr_file.to_csv(csv_path, index=False) |
| 40 | + logging.info('CSV file has been written') |
| 41 | + |
| 42 | + |
| 43 | +def test_proxy(proxy_type: str, proxy_address: str, iptest: str): |
| 44 | + '''This function takes a proxy (type, address) |
| 45 | + and tests it against a given iptest adress.''' |
| 46 | + |
| 47 | + logging.info(f'Testing proxy: {proxy_address}') |
| 48 | + |
| 49 | + try: |
| 50 | + proxies = {proxy_type: proxy_address} |
| 51 | + proxy_status: str = '' |
| 52 | + |
| 53 | + if proxy_type == 'https': |
| 54 | + r = requests.get(f'https://{iptest}', proxies=proxies) |
| 55 | + else: |
| 56 | + r = requests.get(f'http://{iptest}', proxies=proxies) |
| 57 | + |
| 58 | + try: |
| 59 | + json_response: dict = r.json() |
| 60 | + |
| 61 | + if json_response["ip"] in proxy_address: |
| 62 | + proxy_status = 'Proxy functional' |
| 63 | + else: |
| 64 | + logging.warning(f'Proxy "{proxy_address}"' |
| 65 | + f'returned {json_response}') |
| 66 | + proxy_status = 'Proxy not functional' |
| 67 | + except JSONDecodeError: |
| 68 | + proxy_status = 'Invalid response' |
| 69 | + except ProxyError: |
| 70 | + proxy_status = 'Proxy error' |
| 71 | + |
| 72 | + logging.info(f'Proxy {proxy_address}: {proxy_status}') |
| 73 | + return {'proxy_type': proxy_type, |
| 74 | + 'proxy_address': proxy_address, |
| 75 | + 'proxy_status': proxy_status} |
| 76 | + |
| 77 | + |
| 78 | +def test_single_proxy(proxy: str, iptest: str, csv_path: str): |
| 79 | + '''This function tests an individual proxy and adds it to the CSV file.''' |
| 80 | + proxy_type, proxy_address = proxy.split('://') |
| 81 | + result: dict = test_proxy(proxy_type, proxy_address, iptest) |
| 82 | + |
| 83 | + add_proxies_to_file(Path(csv_path), [result]) |
| 84 | + |
| 85 | + |
| 86 | +def test_csv_file(iptest: str, csv_path: str): |
| 87 | + '''This function (re)tests every proxy in a given CSV file.''' |
| 88 | + |
| 89 | + csv_path: Path = Path(csv_path) |
| 90 | + |
| 91 | + if csv_path.exists(): |
| 92 | + pr_file: pd.DataFrame = pd.read_csv(csv_path) |
| 93 | + else: |
| 94 | + raise FileNotFoundError |
| 95 | + |
| 96 | + proxies: list = [] |
| 97 | + |
| 98 | + for index, proxy in pr_file.iterrows(): |
| 99 | + proxies.append(test_proxy(proxy['proxy_type'], |
| 100 | + proxy['proxy_address'], |
| 101 | + iptest)) |
| 102 | + |
| 103 | + add_proxies_to_file(csv_path, proxies) |
| 104 | + |
| 105 | + |
| 106 | +def add_from_text_file(iptest: str, text_path: str, csv_path: str): |
| 107 | + ''' This function adds a list of proxies |
| 108 | + from a text file (line by line).''' |
| 109 | + text_path: Path = Path(text_path) |
| 110 | + |
| 111 | + if text_path.exists(): |
| 112 | + proxies: list = text_path.read_text().splitlines() |
| 113 | + |
| 114 | + for proxy in proxies: |
| 115 | + '''We will treat each proxy as a single proxy |
| 116 | + and leverage the existing function''' |
| 117 | + test_single_proxy(proxy, iptest, csv_path) |
| 118 | + else: |
| 119 | + raise FileNotFoundError |
0 commit comments