# Fetch User Anime List
* Given a user, we get an up-to-date version of their anime list
* Supports reading public anime-lists from MyAnimeList and AniList
* Other websites can be used by exporting the list to XML (see https://malscraper.azurewebsites.net)

In [None]:
import logging
import os
import time
from io import StringIO

import pandas as pd
from tqdm import tqdm

In [None]:
username = ""
source = ""
task = ""

In [None]:
allowed_sources = ["MAL", "AniList", "Kitsu", "Training"]
assert source in allowed_sources

In [None]:
data_path = os.path.join("../../data/recommendations", username)
if not os.path.exists(data_path):
    os.makedirs(data_path)

In [None]:
logger = logging.getLogger("FetchMediaList")
logger.setLevel(logging.DEBUG)
formatter = logging.Formatter(
    "%(name)s:%(levelname)s:%(asctime)s: %(message)s", datefmt="%Y-%m-%d %H:%M:%S"
)
for stream in [
    logging.StreamHandler(),
]:
    stream.setFormatter(formatter)
    logger.addHandler(stream)

In [None]:
print(f"Fetching lists for {username} from {source}")

In [None]:
def import_from_api(username, medium):
    df, ret = get_user_media_list(username, medium)
    if not ret:
        raise Exception(f"Could not resolve list for {username}")
    return df

In [None]:
def import_from_mal(username, medium):
    pwd = os.getcwd()
    try:
        os.chdir("../API/API")
        %run MalApi.ipynb
        df = import_from_api(username, medium)
    finally:
        os.chdir(pwd)
    return df

In [None]:
def import_from_anilist(username, medium):
    pwd = os.getcwd()
    try:
        os.chdir("../API/API")
        %run AnilistApi.ipynb
        userid = get_userid(username, medium)
        df = import_from_api(userid)
    finally:
        os.chdir(pwd)
    return df

In [None]:
def import_from_kitsu(username, medium):
    pwd = os.getcwd()
    try:
        os.chdir("../API/API")
        %run KitsuApi.ipynb
        userid = get_userid(username, medium)
        df = import_from_api(userid)
    finally:
        os.chdir(pwd)
    return df

In [None]:
def import_from_splits(username, medium, splits):
    prefix = username + ","
    userid = int(username)
    lines = []
    for content in ["explicit", "implicit", "ptw"]:
        for split in splits:
            fn = f"../../data/splits/{content}.{task}.{split}.user_{medium}_list.csv"
            with open(fn) as f:
                header = False
                for line in tqdm(f):
                    if not header:
                        header = True
                        fields = line.strip().split(",")
                        assert fields.index("username") == 0
                        if not lines:
                            lines.append(line)
                        continue
                    if line.startswith(prefix):
                        lines.append(line)
    df = pd.read_csv(StringIO("\n".join(lines)))
    return df


def import_from_training(username, medium):
    return import_from_splits(username, medium, ["training"])

In [None]:
import_fns = {
    "MAL": import_from_mal,
    "AniList": import_from_anilist,
    "Kitsu": import_from_kitsu,
    "Training": import_from_training,
}
if not source in import_fns:
    raise Exception(f"Unsupported animelist source {source}")

In [None]:
def save_path(source, medium):
    return os.path.join(data_path, f"user_{medium}_list.{source.lower()}.csv")

In [None]:
for medium in ["anime", "manga"]:
    df = import_fns[source](username, medium)
    for prev_source in allowed_sources:
        path = save_path(prev_source, medium)
        if os.path.exists(path):
            os.remove(path)
    df.to_csv(save_path(source, medium), index=False)