# Getting media relations
* We store metadata in `data/mal/anime_facts/anime_relations.csv`
* We use anilist as a source because the related_series endpoint is broken on MAL

In [None]:
import glob
import logging
import os

from tqdm import tqdm

In [None]:
source_media = ""
PROXY_NUMBER = 0
NUM_PARTITIONS = 1

# Setup

In [None]:
def import_notebook(nb):
    cwd = os.getcwd()
    try:
        os.chdir(os.path.dirname(nb))
        script = os.path.basename(nb)
        %run $script
    finally:
        os.chdir(cwd)

In [None]:
import_notebook(f"../API/MalApi.ipynb")

In [None]:
data_path = f"../../../data/mal/{source_media}_facts"
if not os.path.exists(data_path):
    os.mkdir(data_path)
os.chdir(data_path)

In [None]:
name = f"get_{source_media}_relations"
logger = logging.getLogger(name)
logger.setLevel(logging.DEBUG)
formatter = logging.Formatter(
    "%(name)s:%(levelname)s:%(asctime)s: %(message)s", datefmt="%Y-%m-%d %H:%M:%S"
)
for stream in [
    logging.FileHandler(f"{name}.{PROXY_NUMBER}.log", "w+"),
]:
    stream.setFormatter(formatter)
    logger.addHandler(stream)

# Collect data

In [None]:
def get_media_ids(input_fn):
    item_ids = set()
    with open(input_fn, "r") as in_file:
        header = False
        for line in tqdm(in_file):
            if not header:
                idx = line.strip().split(",").index("uid")
                header = True
                continue
            item_id = line.strip().split(",")[idx]
            item_ids.add(int(item_id))
    return item_ids

In [None]:
media_ids = set()
for file in glob.glob(f"../user_{source_media}_facts/user_{source_media}_list.*.csv"):
    media_ids |= get_media_ids(file)
media_ids = sorted(media_ids)
media_ids = sorted(
    [x for (i, x) in enumerate(media_ids) if i % NUM_PARTITIONS == PROXY_NUMBER]
)

In [None]:
media_relations = [get_media_relations(x, source_media) for x in tqdm(media_ids)]
media_relations = pd.concat(media_relations, ignore_index=True)
media_relations.to_csv(f"{source_media}_relations.{PROXY_NUMBER}.csv", index=False)