# Generating a list of MAL usernames
* We look up a username by querying their user id
* You can terminate or restart the notebook at any point without losing progress. All users found so far will be stored at `data/mal/user_facts/usernames_from_id.txt`.
* This notebook will run indefinitely. You must manually terminate once an acceptable number of users have been found

In [None]:
import random
import re

from tqdm import tqdm

In [None]:
name = "get_user_from_id"
source = "mal"

In [None]:
PROXY_PARTITION = "0,2"
%run WebEndpointBase.ipynb

In [None]:
# if we rerunning the notebook, then resume execution where we last left off
usernames = set()
if os.path.exists("usernames_from_id.txt"):
    with open("usernames_from_id.txt") as f:
        usernames = {x.strip() for x in f.readlines() if x.strip()}

searched_userids = set()
if os.path.exists("searched_userids.txt"):
    with open("searched_userids.txt") as f:
        for line in tqdm(f):
            searched_userids.add(int(line.strip()))

logger.info(
    f"Starting with {len(usernames)} stored usernames after searching {len(searched_userids)} userids"
)

In [None]:
# returns all usernames that have commented on the given userid's profile
def get_usernames(userid):
    url = f"https://myanimelist.net/comments.php?id={userid}"
    response = call_api(url)
    if response.status_code in [404]:
        # the user may have deleted their account
        return set()
    if not response.ok:
        logger.warning(f"Error {response} received when handling {url}")
        return set()
    urls = re.findall('''/profile/[^"/#%]+"''', response.text)
    users = {x[len("/profile/") : -len('"')] for x in urls}
    return users

In [None]:
def save():
    atomic_to_csv(sorted(list(usernames)), "usernames_from_id.txt")
    atomic_to_csv(sorted(list(searched_userids)), "searched_userids.txt")
    logger.info(
        f"Successfully wrote {len(usernames)} users after searching {len(searched_userids)} userids"
    )

In [None]:
# can get the most recent userid by searching the usernames in https://myanimelist.net/comments.php?id=16857856
max_mal_userid = 16857854 # current as of 20230624
remaining_userids = list(set(range(1, max_mal_userid + 1)) - searched_userids)
random.shuffle(remaining_userids)
for userid in remaining_userids:
    usernames |= get_usernames(userid)
    searched_userids.add(userid)
    if should_save("users"):
        save()
save()

In [None]:
logger.info("Finished!")

In [None]:
https://myanimelist.net/comments.php?id=16553901