# Getting anime details
* We store anime metadata in `data/mal/anime_facts/anime.csv`

In [None]:
import logging
import os

from tqdm import tqdm
import glob

# Setup

In [None]:
def import_notebook(nb):
    cwd = os.getcwd()
    try:
        os.chdir(os.path.dirname(nb))
        script = os.path.basename(nb) 
        %run $script
    finally:
        os.chdir(cwd)

In [None]:
import_notebook(f"../API/MalApi.ipynb")

In [None]:
data_path = "../../../data/mal/anime_facts"
if not os.path.exists(data_path):
    os.mkdir(data_path)
os.chdir(data_path)

In [None]:
name = "get_anime"
logger = logging.getLogger(name)
logger.setLevel(logging.DEBUG)
formatter = logging.Formatter(
    "%(name)s:%(levelname)s:%(asctime)s: %(message)s", datefmt="%Y-%m-%d %H:%M:%S"
)
for stream in [
    logging.FileHandler(f"{name}.log"),
    logging.StreamHandler(),
]:
    stream.setFormatter(formatter)
    logger.addHandler(stream)

# Collect data

In [None]:
def get_item_ids(input_fn):
    item_ids = set()
    with open(input_fn, "r") as in_file:
        header = False
        for line in tqdm(in_file):
            if not header:
                idx = line.strip().split(",").index("uid")
                header = True
                continue
            item_id = line.strip().split(",")[idx]
            item_ids.add(int(item_id))
    return item_ids

In [None]:
def get_all_details(anime_ids):
    anime_details = [get_anime_details(x) for x in tqdm(anime_ids)]
    return pd.concat(anime_details, ignore_index=True)

In [None]:
anime_ids = set()
for file in glob.glob("../user_anime_facts/user_anime_list.*.csv"):
    anime_ids |= get_item_ids(file)
anime_ids = sorted(anime_ids)

In [None]:
anime_ids = anime_ids[30:40]

In [None]:
anime_details = [get_anime_details(x) for x in tqdm(anime_ids)]
anime_list = pd.concat(anime_details, ignore_index=True)
anime_list.to_csv("anime.csv", index=False)