In [None]:
import json
import os
import sys

import pandas as pd
import requests
from ratelimit import limits, sleep_and_retry

## Parse MAL API

In [None]:
mal_access_token = json.load(open("../../data/mal/mal_authentication/token.json", "r"))

In [None]:
try:
    logger
except:
    logger = logging.getLogger("")

In [None]:
@sleep_and_retry
@limits(calls=1, period=1)
def call_api(url, retry_timeout=1, max_timeout=100):
    try:
        response = requests.get(
            url, headers={"Authorization": f'Bearer {mal_access_token["access_token"]}'}
        )
        if (
            response.status_code in [500, 502, 504, 429, 409]
            and retry_timeout < max_timeout
        ):
            # This can occur if MAL servers go down or if the page doesnt exist
            raise Exception(f"{response.status_code}")
        if response.status_code in [401]:
            logger.error("Authentication token expired")
            sys.exit(1)
    except Exception as e:
        logger.warning(
            f"Received error {str(e)} while accessing {url}. Retrying in {retry_timeout} seconds"
        )
        time.sleep(retry_timeout)
        retry_timeout = retry_timeout * 2
        return call_api(url, retry_timeout)
    return response

In [None]:
def get_user_list(username, listtype, processfn):
    anime_lists = []
    more_pages = True
    url = f"https://api.myanimelist.net/v2/users/{username}/{listtype}list?limit=1000&fields=list_status&nsfw=true"
    while more_pages:
        response = call_api(url)
        if response.status_code in [403, 404]:
            # 403: This can occur if the user has privated their list
            # 404: This can occur if the user deleted their account
            return pd.DataFrame(), False
        if not response.ok:
            logger.warning(f"Error {response} received when handling {url}")
            return pd.DataFrame(), False

        json = response.json()
        anime_lists.append(processfn(json))
        more_pages = "next" in json["paging"]
        if more_pages:
            url = json["paging"]["next"]
    user_anime_list = pd.concat(anime_lists, ignore_index=True)
    user_anime_list["username"] = username
    return user_anime_list, True

In [None]:
def parse_anime_json_node(x):
    ls = x["list_status"]
    entry = pd.DataFrame.from_dict(
        {
            "uid": [x["node"]["id"]],
            "status": [ls.get("status", "")],
            "score": [ls.get("score", -1)],
            "num_episodes_watched": [ls.get("num_episodes_watched", -1)],
            "is_rewatching": [ls.get("is_rewatching", False)],
            "start_date": [ls.get("start_date", "")],
            "finish_date": [ls.get("finish_date", "")],
            "priority": [ls.get("priority", -1)],
            "num_times_rewatched": [ls.get("num_times_rewatched", -1)],
            "rewatch_value": [ls.get("rewatch_value", -1)],
            "updated_at": [ls.get("updated_at", "")],
        }
    )
    return entry


def process_anime_json(json):
    entries = [parse_anime_json_node(x) for x in json["data"]]
    if entries:
        return pd.concat(entries, ignore_index=True)
    else:
        return pd.DataFrame.from_dict(
            {
                "uid": [],
                "status": [],
                "score": [],
                "num_episodes_watched": [],
                "is_rewatching": [],
                "start_date": [],
                "finish_date": [],
                "priority": [],
                "num_times_rewatched": [],
                "rewatch_value": [],
                "updated_at": [],
            }
        )


def get_user_anime_list(username):
    return get_user_list(username, "anime", process_anime_json)

In [None]:
def parse_manga_json_node(x):
    ls = x["list_status"]
    entry = pd.DataFrame.from_dict(
        {
            "uid": [x["node"]["id"]],
            "status": [ls.get("status", "")],
            "score": [ls.get("score", "")],
            "num_volumes": [ls.get("num_volumes_read", "")],
            "num_chapters": [ls.get("num_chapters_read", "")],
            "updated_at": [ls.get("updated_at", "")],
        }
    )
    return entry


def process_manga_json(json):
    entries = [parse_manga_json_node(x) for x in json["data"]]
    if entries:
        return pd.concat(entries, ignore_index=True)
    else:
        return pd.DataFrame.from_dict(
            {
                "uid": [],
                "status": [],
                "score": [],
                "num_volumes": [],
                "num_chapters": [],
                "updated_at": [],
            }
        )


def get_user_manga_list(username):
    return get_user_list(username, "manga", process_manga_json)