# Fetching a user anime-list
* Given a user, we get an up-to-date version of their anime-list
* Then, we normalize it and store the normalized version
* TODO import loading from Ani-list

In [1]:
import os
import pickle
import time

import pandas as pd
import requests
from ratelimit import limits, sleep_and_retry
from tqdm import tqdm

In [2]:
user = "Fro116"

In [3]:
data_path = "../../data/recommendations"
if not os.path.exists(data_path):
    os.mkdir(data_path)
data_path = os.path.join(data_path, user)
if not os.path.exists(data_path):
    os.mkdir(data_path)
os.chdir(data_path)

In [4]:
@sleep_and_retry
@limits(calls=1, period=4)
def call_api(url):
    try:
        response = requests.get(url)
        if response.status_code == 500 or response.status_code == 503:
            # This can occur if MAL servers go down
            raise Exception(f"{response.status_code}")
    except Exception as e:
        retry_timeout = 600
        print(
            f"Recevied error {str(e)} while accessing {url}. Retrying in {retry_timeout} seconds"
        )
        time.sleep(retry_timeout)
        return call_api(url)
    return response

In [5]:
a = call_api(f"https://api.jikan.moe/v3/user/{user}/animelist/all/{3}")

In [6]:
def process_json(json):
    records = [(x["mal_id"], x["score"]) for x in json["anime"]]
    return pd.DataFrame.from_records(records, columns=["anime_id", "my_score"])

In [7]:
def get_anime_list(username):
    pages = []

    max_items_per_page = 300  # property of the API
    more_pages = True
    page = 1

    # hack to allow tqdm profiling
    def generator():
        while more_pages:
            yield

    for _ in tqdm(generator()):
        response = call_api(
            f"https://api.jikan.moe/v3/user/{username}/animelist/all/{page}"
        )
        response.raise_for_status()

        paged_anime_list = process_json(response.json())
        pages.append(paged_anime_list)
        page += 1
        if len(paged_anime_list) < max_items_per_page:
            more_pages = False

    anime_list = pd.concat(pages, ignore_index=True)
    anime_list["username"] = username
    anime_list = anime_list.loc[lambda x: x['my_score'] != 0]
    return anime_list[["username", "anime_id", "my_score"]]

In [8]:
df = get_anime_list("Fro116")

2it [00:07,  3.97s/it]


In [9]:
anime_stats = pickle.load(open("../../processed_data/anime_stats.pkl", "rb"))
normalized_anime_stats = pickle.load(open("../../processed_data/normalized_anime_stats.pkl", "rb"))

In [10]:
# see comments in NormalizeRatings.ipynb
df = df.merge(anime_stats, on = 'anime_id').merge(normalized_anime_stats, on = 'anime_id')
df['blp'] = df['anime_bias'] + df['my_score'].mean()
df['score'] = df['my_score'] -  df['blp']
user_contrib = 0.25 
anime_contrib = 0.25
df['score_var'] = df['score'].var() * user_contrib + df['normalized_anime_var'] * anime_contrib

In [11]:
df[['username', 'anime_id', 'my_score', 'blp', 'score']].to_pickle('baseline_predictor.pkl')

In [12]:
df[['username', 'anime_id', 'score', 'score_var']].to_pickle('user_anime_list.pkl')