In [1]:
import json
import os

import pandas as pd
import requests
from ratelimit import limits, sleep_and_retry
from tqdm import tqdm

In [2]:
data_path = "../../data/user_facts"
if not os.path.exists(data_path):
    os.mkdir(data_path)
os.chdir(data_path)

In [3]:
token = json.load(open("../mal_authentication/token.json", "r"))

In [4]:
# Limit api requests to 2 calls per second
@sleep_and_retry
@limits(calls=1, period=0.51)
def call_api(url):
    response = requests.get(
        url, headers={"Authorization": f'Bearer {token["access_token"]}'}
    )
    return response

In [5]:
with open("usernames.txt", "r") as f:
    usernames = [x.strip() for x in f.readlines()]

In [6]:
def process_json(json):
    return pd.DataFrame.from_records(
        [(x['node']['id'], x['node']['score'] if 'score' in x['node'] else 0) for x in json['data']],
                         columns = ['anime_id', 'my_score']
    )

In [7]:
def get_user_anime_list(username):
    anime_lists = []
    more_pages = True
    url = f'https://api.myanimelist.net/v2/users/{username}/animelist?limit=1000&fields=list_status'
    while more_pages:
        response = call_api(url)
    
        if response.status_code == 403 or response.status_code == 404:
            # 403: This can occur if the user privated their list
            # 404: This can occur if the user deleted their account
            return pd.DataFrame(), False
    
        response.raise_for_status()
        json = response.json()
        anime_lists.append(process_json(json))
        
        more_pages = 'next' in json['paging']
        if more_pages:
            url = json['paging']['next']
    user_anime_list = pd.concat(anime_lists, ignore_index=True)
    user_anime_list['username'] = username
    return user_anime_list, True

In [8]:
user_anime_lists = []
failed_users = []
for username in tqdm(usernames[:10]):
    user_anime_list, ok = get_user_anime_list(username)
    if not ok:
        failed_users.append(username)
    user_anime_lists.append(user_anime_list)

100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:11<00:00,  1.17s/it]
