# Getting Anilist users
* Turns an anilist userid into a username

In [None]:
import contextlib
import datetime
import glob
import logging
import os
import random
import shutil

from tqdm import tqdm

In [None]:
name = "anilist"

In [None]:
def import_notebook(nb):
    cwd = os.getcwd()
    try:
        os.chdir(os.path.dirname(nb))
        script = os.path.basename(nb) 
        %run $script
    finally:
        os.chdir(cwd)

In [None]:
import_notebook(f"../API/{name.capitalize()}Api.ipynb")

In [None]:
# outdir
data_path = f"../../../data/{name}/user_facts"
if not os.path.exists(data_path):
    os.makedirs(data_path)
os.chdir(data_path)

In [None]:
# logging
logger = logging.getLogger("GetUsers")
logger.setLevel(logging.DEBUG)
formatter = logging.Formatter(
    "%(name)s:%(levelname)s:%(asctime)s: %(message)s", datefmt="%Y-%m-%d %H:%M:%S"
)
for stream in [
    logging.FileHandler("get_users.log"),
    logging.StreamHandler(),
]:
    stream.setFormatter(formatter)
    logger.addHandler(stream)

## Parse AniList API

In [None]:
# atomic saving utilities
@contextlib.contextmanager
def atomic_append(filename):
    temp = filename + "~"
    with open(temp, "w") as f:
        yield f

    temp2 = temp + "~"
    with open(temp2, "wb") as wfd:
        for f in [filename, temp]:
            with open(f, "rb") as fd:
                shutil.copyfileobj(fd, wfd)
    os.remove(temp)
    os.replace(temp2, filename)


def atomic_append_dataframe_to_csv(df, filename):
    first_run = not os.path.exists(filename)
    if first_run:
        with open(filename, "w") as fp:
            pass
    temp = filename + "~"
    with atomic_append(filename) as f:
        df.to_csv(f, index=False, header=first_run)

In [None]:
# snapshot hourly to amortize the cost of the disk I/O
def should_save(reason):
    should_save = False
    if reason not in save_reasons:
        save_reasons[reason] = (0, 1)
    iterations_since_last_write, iterations_until_next_write = save_reasons[reason]
    iterations_since_last_write += 1
    if iterations_since_last_write >= iterations_until_next_write:
        iterations_since_last_write = 0
        iterations_until_next_write = min(2 * iterations_until_next_write, 5400)
        should_save = True
        logger.info(
            f"Writing data for {reason}. Will next write data "
            f"after {iterations_until_next_write} iterations"
        )
    save_reasons[reason] = (iterations_since_last_write, iterations_until_next_write)
    return should_save


save_reasons = {}

In [None]:
files = glob.glob("../user_anime_facts/user_status.*.csv")
user_df = pd.concat(pd.read_csv(f) for f in files)
user_df = user_df.loc[lambda x: x["failed_attempts"] == 0]

In [None]:
outfile = "userid_to_username.csv"
if os.path.exists(outfile):
    existing_userids = list(pd.read_csv(outfile)["userid"])
else:
    existing_userids = []
new_userids = list(set(user_df.username) - set(existing_userids))
random.shuffle(new_userids)
num_userids = len(existing_userids)

In [None]:
logger.info(
    f"Getting usernames for {len(new_userids)} new userids and {len(existing_userids)} existing userids"
)

In [None]:
out_userids = []
out_usernames = []
for userid in new_userids:
    username = get_username(userid)
    out_userids.append(userid)
    out_usernames.append(username)
    if should_save("users"):
        atomic_append_dataframe_to_csv(
            pd.DataFrame.from_dict({"userid": out_userids, "username": out_usernames}),
            outfile,
        )
        num_userids += len(out_usernames)
        logger.info(
            f"Successfully written {num_userids} usernames. "
            f"{len(new_userids) - len(out_userids)} usernames remaining"
        )
        out_userids = []
        out_usernames = []

In [None]:
atomic_append_dataframe_to_csv(
    pd.DataFrame.from_dict({"userid": out_userids, "username": out_usernames}),
    outfile,
)
num_userids += len(out_usernames)
logger.info(f"Successfully written {num_userids} usernames")
logger.info("Finished writing all usernames!")