# Playground for Proxies

In [None]:
import json
from io import StringIO

import countryflag
import pandas as pd
import requests

#### https://proxyscrape.com/free-proxy-list

In [None]:
PROXYSCRAPE_URL = 'https://api.proxyscrape.com/v3/free-proxy-list/get'

In [None]:
def write_dict_to_json_file(data: dict, filename: str):
    with open(filename, mode='w', encoding='utf-8') as f:
        json.dump(data, f, indent=4)

In [None]:
def write_list_to_txt_file(data: list, filename: str):
    with open(filename, mode='w', encoding='utf-8') as f:
        text = '\n'.join(data)
        f.write(text)

In [None]:
def get_proxyscrape_list(country: str = 'FR', protocol: str = 'socks4'):
    url = PROXYSCRAPE_URL
    params = {
        'request': 'displayproxies',
        'proxy_format' : 'protocolipport',
        'format': 'text',
        'protocol': protocol,
        'timeout': 3000,
        'anonymity': 'all',
        'country': country,
    }
    try:
        response = requests.get(url=url, params=params, timeout=3)
        response.raise_for_status()
        # convert the response to a list
        response = response.text.strip().split('\r\n')
        write_list_to_txt_file(response, f'proxyscrape_{country.lower()}.txt')
        return True, response
    except Exception as e:
        return False, str(e)

In [None]:
def get_proxyscrape_dict(country: str, protocol: str) -> tuple:
    params = {
        'request': 'displayproxies',
        'proxy_format' : 'protocolipport',
        'format': 'json',
        'protocol': protocol,
        'timeout': 3000,
        'anonymity': 'all',
        'country': country,
    }
    try:
        response = requests.get(url=PROXYSCRAPE_URL, params=params, timeout=3)
        response.raise_for_status()
        response = response.json()
        # write_dict_to_json_file(response, f'proxyscrape_{country.lower()}.json')
        return True, response
    except Exception as e:
        return False, str(e)

In [None]:
success, proxies = get_proxyscrape_dict(country='all', protocol='socks4')
if success:
    df = pd.json_normalize(proxies.get('proxies')).astype(str)
    df.to_json('proxyscrape_all.json', indent=4, orient='records')
    if not df.empty:
        countries = sorted(df['ip_data.countryCode'].unique().tolist())
        print(countries)
        print(df[['ip_data.countryCode', 'proxy']].head(10))
else:
    print(proxies)

In [None]:
df[['ip', 'port']].apply(lambda x: f"{x[0]}:{x[1]}", axis=1).tolist()

In [None]:
def get_flag(country: str):
    return countryflag.getflag([country])

In [None]:
countries_and_flags = [f'{c} {get_flag(c)}' for c in countries]
for country_and_flag in countries_and_flags:
    print(country_and_flag)

#### https://www.socks-proxy.net/

In [None]:
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:123.0) Gecko/20100101 Firefox/123.0',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
    'Accept-Language': 'de',
    'Referer': 'https://www.socks-proxy.net/',
    'Connection': 'keep-alive',
    'Upgrade-Insecure-Requests': '1',
    'Sec-Fetch-Dest': 'document',
    'Sec-Fetch-Mode': 'navigate',
    'Sec-Fetch-Site': 'cross-site',
    'Sec-Fetch-User': '?1',
    'DNT': '1',
    'Sec-GPC': '1',
}

In [None]:
# read table from html with pandas
url = "https://www.socks-proxy.net/"
response = requests.get(url, headers=headers)
tables = pd.read_html(StringIO(response.text))

In [None]:
tables[0]

In [None]:
df = tables[0].astype(str)
# filter the table by country code and socks4
df = df[(df['Code'] == 'FR') & (df['Version'] == 'Socks4')]
df.head(10)

In [None]:
# save pandas dataframe to json
df.to_json('socks_proxy.json', indent=4, orient='records')

#### https://mtpro.xyz/api-overview

In [None]:
url = "https://mtpro.xyz/api/"
params = {
    'type': 'socks'
}
response = requests.get(url, params=params)
response = response.json()
df = pd.DataFrame(response).astype(str)
df.to_json('mtpro.json', indent=4, orient='records')
countries = sorted(df['country'].unique().tolist())
print(countries)
df.head(10)

In [None]:
proxies = df[['ip', 'port']].apply(lambda x: f"{x[0]}:{x[1]}", axis=1).tolist()
for p in proxies:
    print(p)