In [None]:
import logging

import pandas as pd
import requests
from ratelimit import limits, sleep_and_retry

## Parse AniList API

In [None]:
try:
    logger
except:
    logger = logging.getLogger("")

In [None]:
@sleep_and_retry
@limits(calls=1, period=2)  # the docs say 90 requests per minute, but are a lie
def call_api(url, json, retry_timeout=1):
    response = None
    try:
        response = requests.post(url, json=json)
        if (
            response.status_code in [500, 502, 504, 429, 409, 530]
            and retry_timeout < 3600
        ):
            # This can occur if Anilist servers go down or if the page doesnt exist
            raise Exception(f"{response.status_code}")
    except Exception as e:
        if response is not None:
            logger.warning(str(response.headers))
            if "Retry-After" in response.headers:
                retry_timeout = int(response.headers["Retry-After"])
            logger.warning(
                f"Received error {str(e)} while accessing {url} with arguments {json['variables']}."
                f" Retrying in {retry_timeout} seconds"
            )
        time.sleep(retry_timeout)
        retry_timeout = min(retry_timeout * 2, 3600)
        return call_api(url, json, retry_timeout)
    return response

In [None]:
def process_json(json):
    records = [
        (
            entry["media"]["idMal"],
            entry["score"],
            entry["status"],
            entry["progress"],
            entry["updatedAt"],
            entry["createdAt"],
        )
        for x in json["data"]["MediaListCollection"]["lists"]
        for entry in x["entries"]
    ]
    df = pd.DataFrame.from_records(
        records,
        columns=[
            "uid",
            "score",
            "status",
            "num_episodes_watched",
            "updated_at",
            "created_at",
        ],
    )
    df = df.loc[lambda x: ~x["uid"].isna()].copy()
    df["uid"] = df["uid"].astype(int)
    df["is_rewatching"] = df["status"] == "REPEATING"
    df["status"] = df["status"].replace(
        {
            "CURRENT": "watching",
            "COMPLETED": "completed",
            "PAUSED": "on_hold",
            "DROPPED": "dropped",
            "PLANNING": "plan_to_watch",
            "REPEATING": "completed",
        }
    )
    return df


def get_user_list(userid, listtype):
    has_next_chunk = True
    chunk = 0
    media_lists = []
    while has_next_chunk:
        query = """
        query ($userID: Int, $MEDIA: MediaType, $chunk: Int) {
            MediaListCollection (userId: $userID, type: $MEDIA, chunk: $chunk) {
                hasNextChunk
                lists {
                    entries
                    {
                        status
                        score(format: POINT_10_DECIMAL)
                        progress
                        updatedAt
                        createdAt
                        media
                        {
                            idMal
                        }
                    }
                }
            }
        }
        """
        variables = {"userID": str(userid), "MEDIA": listtype, "chunk": chunk}
        url = "https://graphql.anilist.co"
        response = call_api(url, {"query": query, "variables": variables})
        if response.status_code in [403, 404]:
            # 403: This can occur if the user privated their list
            # 404: This can occur if the user deleted their account
            return pd.DataFrame(), False
        if not response.ok:
            logger.warning(f"Error {response} received when handling {url}")
            return pd.DataFrame(), False
        has_next_chunk = response.json()["data"]["MediaListCollection"]["hasNextChunk"]
        media_lists.append(process_json(response.json()))
        chunk += 1
    media_list = pd.concat(media_lists)
    # deduplicate shows that appear on multiple lists
    media_list = (
        media_list.sort_values(by=["updated_at", "created_at"])
        .groupby("uid")
        .last()
        .reset_index()
    )
    media_list["username"] = f"{userid}"
    return media_list, True

In [None]:
def get_user_anime_list(userid):
    return get_user_list(userid, "ANIME")

def get_user_manga_list(userid):
    return get_user_list(userid, "MANGA")

In [None]:
def get_userid(username):
    url = "https://graphql.anilist.co"
    query = "query ($username: String) { User (name: $username) { id } }"
    variables = {"username": str(username)}
    response = call_api(url, {"query": query, "variables": variables})
    try:
        response.raise_for_status()
    except Exception as e:
        logger.warning(f"Received error {str(e)} while accessing {url}")
        return f"{userid}"
    return response.json()["data"]["User"]["id"]