# Getting Anilist anime-lists
* See GetUserAnimeLists.ipynb for more info

In [None]:
import datetime
import json
import logging
import os
import time

import numpy as np
import pandas as pd
import requests
from ratelimit import limits, sleep_and_retry
from tqdm import tqdm

In [None]:
name = "anilist"

In [None]:
# outdir
data_path = f"../../data/{name}/user_facts"
if not os.path.exists(data_path):
    os.makedirs(data_path)
os.chdir(data_path)

In [None]:
# logging
logger = logging.getLogger("GetUsers")
logger.setLevel(logging.DEBUG)
formatter = logging.Formatter(
    "%(name)s:%(levelname)s:%(asctime)s: %(message)s", datefmt="%Y-%m-%d %H:%M:%S"
)
for stream in [
    logging.FileHandler("get_user_anime_lists.log"),
    logging.StreamHandler(),
]:
    stream.setFormatter(formatter)
    logger.addHandler(stream)

## Parse AniList API

In [None]:
@sleep_and_retry
@limits(calls=1, period=1)
def call_api(url, json, retry_timeout=1):
    try:
        response = requests.post(url, json=json)
        if response.status_code in [500, 504] and retry_timeout < 3600:
            # This can occur if MAL servers go down or if the page doesnt exist
            raise Exception(f"{response.status_code}")
    except Exception as e:
        logger.warning(
            f"Received error {str(e)} while accessing {url}. Retrying in {retry_timeout} seconds"
        )
        time.sleep(retry_timeout)
        retry_timeout = min(retry_timeout * 2, 3600)
        return call_api(url, json, retry_timeout)
    return response

In [None]:
def get_username(userid):
    url = "https://graphql.anilist.co"
    query = "query ($userid: Int) { User (id: $userid) { name } }"
    variables = {"userid": str(userid)}
    response = call_api(url, {"query": query, "variables": variables})
    response.raise_for_status()
    return response.json()["data"]["User"]["name"]

In [None]:
user_df = pd.read_csv("../user_anime_facts/user_status.csv")

In [None]:
logger.info(f"Getting usernames for {len(user_df)} user ids")

In [None]:
outfile = "userid_to_username.csv"
with open(outfile, "w") as out:
    out.write("userid,username\n")    
    for userid in tqdm(user_df.username):
        username = get_username(userid)
        out.write(f"{userid},{username}\n")

In [None]:
logger.info(f"Success!")