In [None]:
import logging
import os
import time

import pandas as pd
import requests
from ratelimit import limits, sleep_and_retry

# Logging

In [None]:
try:
    logger
except:
    logger = logging.getLogger("")

# Proxies

In [None]:
def get_datapath(path):
    datapath = os.getcwd()
    while datapath.split("/")[-1] not in ["notebooks", "data"]:
        datapath = "/".join(datapath.split("/")[:-1])
    datapath = "/".join(datapath.split("/")[:-1])
    return os.path.join(datapath, "data", path)

In [None]:
PROXYLIST = []
if os.path.exists(get_datapath("proxies/proxies.txt")):
    with open(get_datapath("proxies/proxies.txt")) as f:
        for line in f:
            proxyurl, port, username, password = line.strip().split(":")
            PROXYLIST.append(f"http://{username}:{password}@{proxyurl}:{port}")
PROXYLIST_INDEX = 0

In [None]:
try:
    PROXY_PARTITION
    PROXYLIST = [
        x
        for i, x in enumerate(PROXYLIST)
        if i % int(PROXY_PARTITION.split(",")[1]) == int(PROXY_PARTITION.split(",")[0])
    ]
except:
    pass
try:
    PROXY_NUMBER
    PROXYLIST = [PROXYLIST[PROXY_NUMBER]]
except:
    pass

In [None]:
def get_proxy(source):
    if not PROXYLIST:
        return None
    global PROXYLIST_INDEX
    if source == "mal":
        # switching between proxies can cause the auth token to expire
        assert PROXYLIST_INDEX == 0
    else:
        PROXYLIST_INDEX = (PROXYLIST_INDEX + 1) % len(PROXYLIST)
    proxy = {domain: PROXYLIST[PROXYLIST_INDEX] for domain in ["http", "https"]}
    return proxy

# API endpoint

In [None]:
try:
    API_PERIOD_MULT
    assert(API_PERIOD_MULT >= 1)
    API_PERIOD = API_PERIOD * API_PERIOD_MULT
except:
    pass
try:
    API_PERIOD = API_PERIOD / max(len(PROXYLIST), 1)
except:
    pass
try:
    MIN_API_PERIOD
    API_PERIOD = max(API_PERIOD, MIN_API_PERIOD)
except:
    pass

In [None]:
@sleep_and_retry
@limits(calls=1, period=API_PERIOD)
def call_api_internal(
    url, request_type, source, retry_timeout=1, extra_error_codes=[], **kwargs
):
    if request_type == "POST":
        request_call = requests.post
    elif request_type == "GET":
        request_call = requests.get
    else:
        raise ValueError(f"Invalid request type {request_type}")

    response = None
    try:
        response = request_call(url, proxies=get_proxy(source), timeout=5, **kwargs)
        if (
            response.status_code in [500, 502, 504, 429, 409, 530] + extra_error_codes
            and retry_timeout < 3600
        ):
            # transient errors
            raise Exception(f"{response.status_code}")
        if response.status_code in [401]:
            logger.error("Authentication token expired")
            sys.exit(1)
    except Exception as e:
        if response is not None:
            if "Retry-After" in response.headers:
                retry_timeout = int(response.headers["Retry-After"])
        logger.warning(
            f"Received error '{str(e)}' while accessing {url}. Retrying in {retry_timeout} seconds"
        )
        time.sleep(retry_timeout)
        retry_timeout = min(retry_timeout * 2, 3600)
        return call_api_internal(url, request_type, source, retry_timeout, **kwargs)
    return response