In [None]:
from ratelimit import limits, sleep_and_retry

## Parse AniList API

In [None]:
@sleep_and_retry
@limits(calls=1, period=1.5)  # the docs say 90 requests per minute, but are a lie
def call_api(url, json, retry_timeout=1):
    try:
        response = requests.post(url, json=json)
        if response.status_code in [500, 502, 504, 429, 409] and retry_timeout < 3600:
            # This can occur if Anilist servers go down or if the page doesnt exist
            raise Exception(f"{response.status_code}")
    except Exception as e:
        logger.warning(str(response.headers))
        if "Retry-After" in response.headers:
            retry_timeout = int(response.headers["Retry-After"])
        logger.warning(
            f"Received error {str(e)} while accessing {url} with arguments {json['variables']}."
            f" Retrying in {retry_timeout} seconds"
        )
        time.sleep(retry_timeout)
        retry_timeout = min(retry_timeout * 2, 3600)
        return call_api(url, json, retry_timeout)
    return response


def process_json(json):
    records = [
        (
            entry["media"]["idMal"],
            entry["score"],
            entry["status"],
            entry["progress"],
            entry["updatedAt"],
            entry["createdAt"],
        )
        for x in json["data"]["MediaListCollection"]["lists"]
        for entry in x["entries"]
    ]
    df = pd.DataFrame.from_records(
        records,
        columns=[
            "uid",
            "score",
            "status",
            "num_episodes_watched",
            "updated_at",
            "created_at",
        ],
    )
    df["is_rewatching"] = df["status"] == "REPEATING"
    df["status"] = df["status"].replace(
        {
            "CURRENT": "watching",
            "COMPLETED": "completed",
            "PAUSED": "on_hold",
            "DROPPED": "dropped",
            "PLANNING": "plan_to_watch",
            "REPEATING": "completed",
        }
    )
    while any(df["score"] > 10):
        df["score"] /= 10
    return df


def get_user_anime_list(userid):
    has_next_chunk = True
    chunk = 0
    anime_lists = []
    while has_next_chunk:
        query = """
        query ($userID: Int, $MEDIA: MediaType, $chunk: Int) {
            MediaListCollection (userId: $userID, type: $MEDIA, chunk: $chunk) {
                hasNextChunk
                lists {
                    entries
                    {
                        status
                        score
                        progress
                        updatedAt
                        createdAt
                        media
                        {
                            idMal
                        }
                    }
                }
            }
        }
        """
        variables = {"userID": str(userid), "MEDIA": "ANIME", "chunk": chunk}
        url = "https://graphql.anilist.co"
        response = call_api(url, {"query": query, "variables": variables})
        if response.status_code in [403, 404]:
            # 403: This can occur if the user privated their list
            # 404: This can occur if the user deleted their account
            return pd.DataFrame(), False
        if not response.ok:
            logger.warning(f"Error {response} received when handling {url}")
            return pd.DataFrame(), False
        has_next_chunk = response.json()["data"]["MediaListCollection"]["hasNextChunk"]
        anime_lists.append(process_json(response.json()))
        chunk += 1
    anime_list = pd.concat(anime_lists)
    # deduplicate shows that appear on multiple lists
    anime_list = (
        anime_list.sort_values(by=["updated_at", "created_at"])
        .groupby("uid")
        .last()
        .reset_index()
    )
    anime_list["username"] = f"{userid}"
    return anime_list, True