# Fetch User Anime List
* Given a user, we get an up-to-date version of their anime list
* Supports reading public anime-lists from MyAnimeList and AniList
* Other websites can be used by exporting the list to XML (see https://malscraper.azurewebsites.net)

In [None]:
import logging
import os
import time
import xml.etree.ElementTree as ET

import pandas as pd

In [None]:
username = ""
source = ""

In [None]:
allowed_sources = ["MAL", "AniList", "XML", "Training"]
assert source in allowed_sources

In [None]:
data_path = os.path.join("../../data/recommendations", username)
if not os.path.exists(data_path):
    os.mkdir(data_path)

In [None]:
logger = logging.getLogger("FetchUserAnimeList")
logger.setLevel(logging.DEBUG)
formatter = logging.Formatter(
    "%(name)s:%(levelname)s:%(asctime)s: %(message)s", datefmt="%Y-%m-%d %H:%M:%S"
)
for stream in [
    logging.StreamHandler(),
]:
    stream.setFormatter(formatter)
    logger.addHandler(stream)

In [None]:
print(f"Fetching list for {username} from {source}")

In [None]:
def import_from_api(username):
    df, ret = get_user_anime_list(username)
    if not ret:
        raise Exception(f"Could not resolve list for {username}")
    return df

In [None]:
def import_from_mal(username):
    %run ../API/MalApi.ipynb
    return import_from_api(username)

In [None]:
def import_from_anilist(username):
    %run ../API/AnilistApi.ipynb
    userid = get_userid(username)
    return import_from_api(userid)

In [None]:
def import_from_splits(username, splits):
    userid = int(username)
    dfs = []
    for content in ["explicit", "implicit", "ptw"]:
        for split in splits:
            df = pd.read_csv(f"../../data/splits/{content}_{split}.csv")
            dfs.append(df.loc[lambda x: x["username"] == userid])
    df = pd.concat(dfs, ignore_index=True)
    return df


def import_from_training(username):
    return import_from_splits(username, ["training"])

In [None]:
def parse_xml_entry(root):
    uid = 0
    status = ""
    score = 0
    num_episodes_watched = 0

    # unparseable fields
    is_rewatching = 0
    start_date = 0
    finish_date = 0
    priority = 0
    num_times_rewatched = -1
    rewatch_value = 0
    updated_at = "1970-01-01T00:00:00+00:00"

    xml_statuses = {
        "Watching": "watching",
        "Completed": "completed",
        "Dropped": "dropped",
        "On-Hold": "on_hold",
        "Plan to Watch": "plan_to_watch",
    }

    for child in root:
        if child.tag == "series_animedb_id":
            uid = int(child.text)
        if child.tag == "my_score":
            score = float(child.text)
        if child.tag == "my_status":
            status = xml_statuses.get(child.text)
        if child.tag == "my_watched_episodes":
            num_episodes_watched = int(child.text)
    return (
        uid,
        status,
        score,
        num_episodes_watched,
        is_rewatching,
        start_date,
        finish_date,
        priority,
        num_times_rewatched,
        rewatch_value,
        updated_at,
    )


def import_from_xml(username):
    file = os.path.join(data_path, f"../../xml/{username}.xml")
    tree = ET.parse(file)
    root = tree.getroot()
    assert (root[0][1].tag == "user_name") and (root[0][1].text == username)
    records = []
    for child in root:
        if child.tag == "anime":
            fields = parse_xml_entry(child)
            records.append((*fields, username))
    return pd.DataFrame.from_records(
        records,
        columns=[
            "uid",
            "status",
            "score",
            "num_episodes_watched",
            "is_rewatching",
            "start_date",
            "finish_date",
            "priority",
            "num_times_rewatched",
            "rewatch_value",
            "updated_at",
            "username",
        ],
    )

In [None]:
import_fns = {
    "MAL": import_from_mal,
    "AniList": import_from_anilist,
    "XML": import_from_xml,
    "Training": import_from_training,
}
if not source in import_fns:
    raise Exception(f"Unsupported animelist source {source}")

In [None]:
df = import_fns[source](username)

In [None]:
def save_path(source):
    return os.path.join(data_path, f"user_anime_list.{source.lower()}.csv")


for prev_source in allowed_sources:
    if os.path.exists(save_path(prev_source)):
        os.remove(save_path(prev_source))
df.to_csv(save_path(source), index=False)