# Generating a list of MAL usernames
* The page https://myanimelist.net/users.php contains a list of recently active users
* We continually refresh that page and write down any new users
* You can terminate or restart the notebook at any point without losing progress. All users found so far will be stored at `data/mal/user_facts`.
* This notebook will run indefinitely. You must manually terminate once an acceptable number of users have been found

In [None]:
import os
import re

In [None]:
PROXY_NUMBER = 0

In [None]:
name = "get_recent_username"
source = "mal"

In [None]:
%run WebEndpointBase.ipynb

In [None]:
# if we rerunning the notebook, then resume execution where we last left off
recent_usernames = set()
if os.path.exists("recent_usernames.txt"):
    with open("recent_usernames.txt") as f:
        recent_usernames = {x.strip() for x in f.readlines() if x.strip()}


logger.info(f"Starting with {len(recent_usernames)} stored usernames")

In [None]:
# parse the recently online users page on https://myanimelist.net/users.php
def get_users():
    url = "https://myanimelist.net/users.php"
    response = call_web_api(url)
    if response.status_code in [404]:
        logger.warning(f"Error {response} received when handling {url}")
        return set()
    if not response.ok:
        logger.warning(f"Error {response} received when handling {url}")
        return set()
    urls = re.findall('''/profile/[^"/#%]+"''', response.text)
    users = {x[len("/profile/") : -len('"')] for x in urls}
    return users

In [None]:
while True:
    users = get_users()
    recent_usernames |= users
    if should_save("users"):
        atomic_to_csv(sorted(list(recent_usernames)), "recent_usernames.txt")
        logger.info(f"Successfully written {len(recent_usernames)} users ")