# Continous Scripts
* This notebook will run indefinitely. You must manually kill this notebook after a sufficient dataset has been collected

In [None]:
import os
import subprocess
import time

from ratelimit import limits, sleep_and_retry

In [None]:
cwd = os.getcwd()

In [None]:
def get_proxies(source):
    proxyfn = "../../../environment/proxies/proxy_map.txt"
    if os.path.exists(proxyfn):
        num_proxies = 0
        list_proxies = []
        with open(proxyfn) as f:
            for line in f:
                if not line.strip():
                    continue
                fields = line.strip().split(",")
                if fields[0] == source:
                    if fields[1] == "lists":
                        list_proxies.append(num_proxies)
                    num_proxies += 1
    else:
        num_proxies = 1
        list_proxies = [0]
    return list_proxies, num_proxies

In [None]:
@sleep_and_retry
@limits(calls=1, period=5)
def spawn_process(script, partition, num_partitions, proxy, num_proxies):
    cmdlist = [
        "papermill",
        f"{cwd}/{script}.ipynb",
        "/dev/null",
        "-p",
        "PARTITION",
        str(partition),
        "-p",
        "NUM_PARTITIONS",
        str(num_partitions),
        "-p",
        "PROXY_NUMBER",
        str(proxy),
        "-p",
        "NUM_PROXIES",
        str(num_proxies),
        "-p",
        "TOKEN_NUMBER",
        str(partition),
    ]
    print(cmdlist)
    return subprocess.Popen(cmdlist)

In [None]:
def spawn(source, i):
    list_proxies, num_proxies = PROXIES[source]
    return spawn_process(
        f"GetUserMediaLists{source.capitalize()}", i, len(list_proxies), list_proxies[i], num_proxies
    )

In [None]:
ALL_SOURCES = ["animeplanet", "kitsu", "anilist", "mal"]
PROXIES = {x: get_proxies(x) for x in ALL_SOURCES}
procs = {}

In [None]:
for source in ALL_SOURCES:
    for i in range(len(PROXIES[source][0])):
        k = (source, i)
        procs[k] = spawn(*k)

In [None]:
while True:
    time.sleep(1)
    for k, v in procs.items():
        if v.poll() is not None:
            # spawn a new process to replace the finished one
            procs[k] = spawn(*k)