In [48]:
import requests
import json
import urllib
# importing selenium into code
from selenium import webdriver 
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.common.keys import Keys
import time

# this constant should be modified according to where the web driver has been placed
WEBDRIVER_PATH = './'

#replace client_id with Client Key given at https://myanimelist.net/apiconfig
client_id = "d1ba6c8f42d9abdc2aa2d84fb61702e5"

base_url = "https://api.myanimelist.net/v2/"
auth_header = {"X-MAL-CLIENT-ID" : client_id}

In [9]:
# Get Anime List of turtlemage

#user = "turtlemage"
#content_type = "animelist"
# Construct url to HTTP GET
#url = base_url + "users/" + user + "/" + content_type

#fields = {
#    "fields" : "list_status"
#}
# Send HTTP GET Request
#r = requests.get(url, headers = auth_header, params = fields)
#print(r.status_code)
#
#full_json = r.json()
#next_request_url = full_json['paging']['next']
#print(full_json)




# Loop for paging through queries that may require more than 1 Request
#while 'paging' in full_json:
#    paging = full_json['paging']
#    new_url = paging['next']
#    r = requests.get(new_url, headers = auth_header)
# Status Code 200 means that the request is Successful (i.e not Rate Limited)
#    if r.status_code == 200:
# This appends the previous json with the newly obtained json
#        full_json['data'] += r.json()['data']
#        if 'next' in r.json()['paging']:
#            full_json['paging'] = r.json()['paging']
#        else:
#            full_json.pop('paging')
#print(full_json)


In [10]:
def get_user_list_ids(client_id, user, content_type='anime', fields='list_status', returnJSON=True, return_type='id'):
    """
    Parameters:
    
    client_id    - Client ID generated by MyAnimeList app ("www.myanimelist.net/apiconfig/")
    user         - username of user
    content_type - anime or manga, to decide whether to find animelist or mangalist respectively
    fields       - list of fields to include in returned data, string deliminated by commas
    returnJSON   - boolean deciding whether to return raw JSON or to return simplified data
    return_type  - recommended id or title, determines the keys of formatted dictionary        
    """ 
    auth_header = {'X-MAL-CLIENT-ID' : client_id}
    base_url = "https://api.myanimelist.net/v2/"
    #contentType is either anime or manga
    if content_type == 'anime':
        content = 'animelist'
    else:
        content = 'mangalist'
    data_request = {'fields' : fields}
    # Create request url, to pass HTTP GET request with header and fields
    url = base_url + "users/" + user + "/" + content
    # Initial HTTP request
    r = requests.get(url, headers = auth_header, params = data_request)
    if r.status_code != 200:
        raise Exception("HTTP GET Error : "+str(r.status_code))
    full_json = r.json()
    # Loops through all pages for lists that are too large for one HTTP GET
    while 'paging' in full_json:
        paging = full_json['paging']
        new_url = paging['next']
        r = requests.get(new_url, headers = auth_header)
        if r.status_code != 200:
            raise Exception("HTTP GET Error : "+str(r.status_code))
        full_json['data'] += r.json()['data']
        if 'next' in r.json()['paging']:
            full_json['paging'] = r.json()['paging']
        else:
            full_json.pop('paging')
    # Returns raw data if left alone
    if returnJSON:
        return full_json
    # Attempt at Cleaning the json up a bit for usability - not necessarily too useful
    else:
        shows = full_json['data']
    field_list = fields.split(',')
    show_list = {}
    for node in full_json['data']:
        try:
            show = node['node'][return_type]
            show_list[show] = {}
        except:
            raise Exception('return_type not valid - must be id, title, or other field type')
        for field in field_list:
            show_list[show][field] = node['node'][field]
    return show_list





def get_show_id(client_id, show_title, content_type='anime'):
    """
    Parameters:
    
    client_id    - Client ID generated by MyAnimeList app ("www.myanimelist.net/apiconfig/")
    show_title   - title of show that an ID needs to be found for
    content_type - anime or manga, to decide whether to find animelist or mangalist respectively
    """ 
    # Allowing lists to be passed thru to simplify future code
    if type(show_title) is list:
        show_ids = []
        for show in show_title:
            show_id.append(get_show_id(client_id, show, content_type))
        return show_ids
    # Creating HTTP GET header
    auth_header = {'X-MAL-CLIENT-ID' : client_id}
    base_url = "https://api.myanimelist.net/v2/"
    #contentType is either anime or manga
    if content_type == 'anime':
        content = 'anime'
    else:
        content = 'manga'
    # Create request url, to pass HTTP GET request with header and fields
    # Uses MAL's search feature to find show
    url = base_url + content + "?q=" +  urllib.parse.quote(show_title.lower().split()[0])
    # Initial HTTP request
    r = requests.get(url,headers = auth_header)
    if r.status_code != 200:
        raise Exception("HTTP GET Error : "+str(r.status_code))
    for node in r.json()['data']:
        if node['node']['title'] == show_title:
            return node['node']['id']
    raise Exception("Title not Found")
    
    
    
    
    
    
    
def get_show_stats(client_id, show_ids=[], show_title=[], content_type='anime', stat='all'):
    """
    Parameters:
    
    client_id    - Client ID generated by MyAnimeList app ("www.myanimelist.net/apiconfig/")
    show_id      - integer relating to the index in MAL's database
    show_title   - title of show that an ID needs to be found for
    content_type - anime or manga, to decide whether to find animelist or mangalist respectively
    stat         - statistic to be found (all returns default stats)
    """ 
    if type(show_ids) == int:
        show_ids = [show_ids]
    if show_ids == [] and show_title == []:
        raise Exception("Must submit an id or title")
    elif show_ids == []:
        for show in show_title:
            show_ids.append(get_show_id(client_id, show_title, content_type))
    auth_header = {'X-MAL-CLIENT-ID' : client_id}
    base_url = "https://api.myanimelist.net/v2/"
    #contentType is either anime or manga
    if content_type == 'anime':
        content = 'anime/'
    else:
        content = 'manga/'
    # Loop through all shows and request (cant seen to figure out how to batch them other than using a temp holder acct)
    results = []
    for show in show_ids:
        # Create request url, to pass HTTP GET request with header and fields
        url = base_url + content + str(show)
        # Initial HTTP request
        r = requests.get(url, headers = auth_header, params = {'fields' : stat})
        results.append(r.json())
    if stat == 'all':
        return results
    else:
        amended_results = []
        for show in results:
            amended_results.append(show[stat])
        return amended_results

In [11]:
animelist = get_user_list_ids(client_id, 'turtlemage')
print(animelist)
print(get_show_id(client_id, "Yuukoku no Moriarty"))

{'data': [{'node': {'id': 38101, 'title': '5-toubun no Hanayome', 'main_picture': {'medium': 'https://api-cdn.myanimelist.net/images/anime/1819/97947.jpg', 'large': 'https://api-cdn.myanimelist.net/images/anime/1819/97947l.jpg'}}, 'list_status': {'status': 'dropped', 'score': 4, 'num_episodes_watched': 6, 'is_rewatching': False, 'updated_at': '2021-01-27T02:13:39+00:00'}}, {'node': {'id': 41457, 'title': '86', 'main_picture': {'medium': 'https://api-cdn.myanimelist.net/images/anime/1987/117507.jpg', 'large': 'https://api-cdn.myanimelist.net/images/anime/1987/117507l.jpg'}}, 'list_status': {'status': 'plan_to_watch', 'score': 0, 'num_episodes_watched': 0, 'is_rewatching': False, 'updated_at': '2022-01-20T06:52:31+00:00'}}, {'node': {'id': 32998, 'title': '91 Days', 'main_picture': {'medium': 'https://api-cdn.myanimelist.net/images/anime/13/80515.jpg', 'large': 'https://api-cdn.myanimelist.net/images/anime/13/80515l.jpg'}}, 'list_status': {'status': 'completed', 'score': 9, 'num_episodes

40911


In [12]:
def get_genre(client_id, show_ids, content_type='anime'):
    """
    Parameters:
    
    client_id    - Client ID generated by MyAnimeList app ("www.myanimelist.net/apiconfig/")
    show_ids     - list of show indexes to get genre info about
    content_type - anime or manga, to decide whether to find animelist or mangalist respectively
    """ 
    # If only checking 1 show, puts single show into list wrapper
    if type(show_ids) == int:
        show_ids = [show_ids]
    # Define Content Type for url definition
    if content_type == 'anime':
        content = 'anime'
    else:
        content = 'manga'
    # Creating Authentication header for HTTP GET
    auth_header = {'X-MAL-CLIENT-ID' : client_id}
    base_url = "https://api.myanimelist.net/v2/"
    genre_dict = {}
    # For each show in list, HTTP GET genre
    for show in show_ids:
        url = base_url + content + "/" + str(show) + "?fields=genres"
        r = requests.get(url, headers=auth_header)
        genre_dict[show] = r.json()['genres']
    return genre_dict




def get_titles(client_id, show_id, content_type='anime'):
    """
    Parameters:
    
    client_id    - Client ID generated by MyAnimeList app ("www.myanimelist.net/apiconfig/")
    show_id      - id of shows that alternate titles are needed for
    content_type - anime or manga, to decide whether to find animelist or mangalist respectively
    """ 
    # If only checking 1 show, puts single show into list wrapper
    if type(show_ids) == int:
        show_ids = [show_ids]
    # Define Content Type for url definition
    if content_type == 'anime':
        content = 'anime'
    else:
        content = 'manga'
    # Creating Authentication header for HTTP GET
    auth_header = {'X-MAL-CLIENT-ID' : client_id}
    base_url = "https://api.myanimelist.net/v2/"
    title_dict = {}
    # HTTP GET alternative titles for each show entry
    for show in show_id:
        url = base_url + content + "/" + str(show_id) + "?fields=title,alternative_titles"
        r = requests.get(url, headers=auth_header)
        title_dict[show] = r.json()['alternative_titles']
        title_dict[show]['popular_title'] = r.json()['title']
    return title_dict

In [13]:
get_genre(client_id, 40911)


{40911: [{'id': 13, 'name': 'Historical'},
  {'id': 7, 'name': 'Mystery'},
  {'id': 40, 'name': 'Psychological'},
  {'id': 27, 'name': 'Shounen'},
  {'id': 41, 'name': 'Suspense'}]}

In [14]:
def get_genre_preferences(client_id, user, content_type='anime', only_watched=True):
    """
    Parameters:
    
    client_id    - Client ID generated by MyAnimeList app ("www.myanimelist.net/apiconfig/")
    user         - MAL username that is being analysed
    content_type - anime or manga, to decide whether to find animelist or mangalist respectively
    only_watched - Set False if all entries (including Dropped and Plan to Watch) are to be analysed
    """
    # Get user animelist
    show_list = {}
    user_animelist = get_user_list_ids(client_id, user, content_type, fields='list_status')
    if only_watched:
        for node in user_animelist['data']:
            if node['list_status']['status'] == "completed":
                show_list[node['node']['id']] = node['list_status']['score']
    else:
        for node in user_anilist['data']:
            show_list[node['node']['id']] = node['list_status']['score']
    # Get show ids
    show_ids = []
    for key in show_list.keys():
        show_ids.append(key)
    # get genre info
    genre_list = get_genre(client_id, show_ids, content_type)
    genres = {}
    # iterate through genres to get count of number of shows with each genre tag and average user score
    for show in genre_list.keys():
        for genre in genre_list[show]:
            if genre['id'] not in genres.keys():
                genres[genre['id']] = {'genre': genre, 'count' : 1, 'avg_score' : show_list[show]}
            else:
                count = genres[genre['id']]['count'] + 1
                avg_score = (genres[genre['id']]['avg_score'] * (count - 1) + show_list[show]) / count
                genres[genre['id']]['count'] = count
                genres[genre['id']]['avg_score'] = avg_score
    return genres            

In [15]:
get_genre_preferences(client_id, 'turtlemage')

{1: {'genre': {'id': 1, 'name': 'Action'},
  'count': 70,
  'avg_score': 7.442857142857143},
 8: {'genre': {'id': 8, 'name': 'Drama'},
  'count': 38,
  'avg_score': 7.921052631578948},
 13: {'genre': {'id': 13, 'name': 'Historical'},
  'count': 10,
  'avg_score': 8.8},
 68: {'genre': {'id': 68, 'name': 'Organized Crime'},
  'count': 3,
  'avg_score': 7.666666666666667},
 9: {'genre': {'id': 9, 'name': 'Ecchi'}, 'count': 22, 'avg_score': 6.5},
 35: {'genre': {'id': 35, 'name': 'Harem'},
  'count': 31,
  'avg_score': 6.806451612903226},
 22: {'genre': {'id': 22, 'name': 'Romance'},
  'count': 60,
  'avg_score': 7.266666666666667},
 23: {'genre': {'id': 23, 'name': 'School'}, 'count': 45, 'avg_score': 7.6},
 37: {'genre': {'id': 37, 'name': 'Supernatural'},
  'count': 28,
  'avg_score': 7.821428571428571},
 4: {'genre': {'id': 4, 'name': 'Comedy'},
  'count': 75,
  'avg_score': 7.533333333333333},
 57: {'genre': {'id': 57, 'name': 'Gag Humor'},
  'count': 7,
  'avg_score': 8.2857142857142

In [148]:
def get_vintage_preferences(client_id, user, content_type='anime', only_watched=True):
    """
    Parameters:
    
    client_id    - Client ID generated by MyAnimeList app ("www.myanimelist.net/apiconfig/")
    user         - MAL username that is being analysed
    content_type - anime or manga, to decide whether to find animelist or mangalist respectively
    only_watched - Set False if all entries (including Dropped and Plan to Watch) are to be analysed
    """
    # get user list and season info
    show_list = {}
    user_animelist = get_user_list_ids(client_id, user, content_type, fields='list_status,start_season')
    season_list = {}
    if only_watched:
        for node in user_animelist['data']:
            if node['list_status']['status'] == "completed":
                if 'start_season' in node['node'].keys():
                    show_list[node['node']['id']] = node['list_status']['score']
                    season_list[node['node']['id']] = node['node']['start_season']
    else:
        for node in user_anilist['data']:
            if 'start_season' in node['node'].keys():
                show_list[node['node']['id']] = node['list_status']['score']
                season_list[node['node']['id']] = node['node']['start_season']
    show_ids = []
    seasons = {}
    # get show ids
    for key in show_list.keys():
        show_ids.append(key)
    # for each season, count number of shows and average rating
    for show in season_list.keys():
        season = str(season_list[show]['year'])+" "+season_list[show]['season']
        if season not in seasons.keys():
            seasons[season] = {'season': season_list[show], 'count' : 1, 'avg_score' : show_list[show]}
        else:
            count = seasons[season]['count'] + 1
            avg_score = (seasons[season]['avg_score'] * (count - 1) + show_list[show]) / count
            seasons[season]['count'] = count
            seasons[season]['avg_score'] = avg_score
    return seasons     

In [149]:
print(get_vintage_preferences(client_id, 'Khalid741'))

{'node': {'id': 38101, 'title': '5-toubun no Hanayome', 'main_picture': {'medium': 'https://api-cdn.myanimelist.net/images/anime/1819/97947.jpg', 'large': 'https://api-cdn.myanimelist.net/images/anime/1819/97947l.jpg'}, 'start_season': {'year': 2019, 'season': 'winter'}}, 'list_status': {'status': 'completed', 'score': 8, 'num_episodes_watched': 12, 'is_rewatching': False, 'updated_at': '2021-05-01T10:46:14+00:00'}}
{'node': {'id': 39783, 'title': '5-toubun no Hanayome ∬', 'main_picture': {'medium': 'https://api-cdn.myanimelist.net/images/anime/1775/109514.jpg', 'large': 'https://api-cdn.myanimelist.net/images/anime/1775/109514l.jpg'}, 'start_season': {'year': 2021, 'season': 'winter'}}, 'list_status': {'status': 'completed', 'score': 8, 'num_episodes_watched': 12, 'is_rewatching': False, 'updated_at': '2021-05-06T09:07:13+00:00'}}
{'node': {'id': 41457, 'title': '86', 'main_picture': {'medium': 'https://api-cdn.myanimelist.net/images/anime/1987/117507.jpg', 'large': 'https://api-cdn.m

In [18]:
def get_related_shows(client_id, show_ids, content_type='anime'):
    """
    Parameters:
    
    client_id    - Client ID generated by MyAnimeList app ("www.myanimelist.net/apiconfig/")
    show_ids     - List of show ids to check
    content_type - anime or manga, to decide whether to find animelist or mangalist respectively
    """
    # If only checking 1 show, puts single show into list wrapper
    if type(show_ids) == int:
        show_ids = [show_ids]
    # Define Content Type for url definition
    if content_type == 'anime':
        content = 'anime'
    else:
        content = 'manga'
    # Creating Authentication header for HTTP GET
    auth_header = {'X-MAL-CLIENT-ID' : client_id}
    base_url = "https://api.myanimelist.net/v2/"
    franchises = {}
    # HTTP GET each show's related series
    for show in show_ids:
        url = base_url + content + "/" + str(show) + "?fields=related_" + content
        r = requests.get(url, headers=auth_header)
        if show not in franchises:
            franchises[show] = r.json()['related_'+content]
    for show in franchises.keys():
        rel_list = []
        for node in franchises[show]:
            rel_list.append(node['node']['id'])
        franchises[show] = rel_list
    return franchises

In [19]:
get_related_shows(client_id, get_show_id(client_id, "Bakemonogatari"))

{5081: [6948, 11597, 15689, 30514, 32268]}

In [20]:
def get_show_pictures(client_id, show_ids, content_type='anime'):
    """
    Parameters:
    
    client_id    - Client ID generated by MyAnimeList app ("www.myanimelist.net/apiconfig/")
    show_ids     - List of show ids to check
    content_type - anime or manga, to decide whether to find animelist or mangalist respectively
    """
    # If only checking 1 show, puts single show into list wrapper
    if type(show_ids) == int:
        show_ids = [show_ids]
    # Define Content Type for url definition
    if content_type == 'anime':
        content = 'anime'
    else:
        content = 'manga'
    # Creating Authentication header for HTTP GET
    auth_header = {'X-MAL-CLIENT-ID' : client_id}
    base_url = "https://api.myanimelist.net/v2/"
    images = {}
    # HTTP GET each show's related series
    for show in show_ids:
        url = base_url + content + "/" + str(show)
        r = requests.get(url, headers=auth_header)
        images[show] = r.json()['main_picture']
    return images

In [21]:
get_show_pictures(client_id, 1)

{1: {'medium': 'https://api-cdn.myanimelist.net/images/anime/4/19644.jpg',
  'large': 'https://api-cdn.myanimelist.net/images/anime/4/19644l.jpg'}}

In [22]:
def get_show_studio(client_id, show_ids, content_type='anime'):
    """
    Parameters:
    
    client_id    - Client ID generated by MyAnimeList app ("www.myanimelist.net/apiconfig/")
    show_ids     - List of show ids to check
    content_type - anime or manga, to decide whether to find animelist or mangalist respectively
    """
    # If only checking 1 show, puts single show into list wrapper
    if type(show_ids) == int:
        show_ids = [show_ids]
    # Define Content Type for url definition
    if content_type == 'anime':
        content = 'anime'
    else:
        content = 'manga'
    # Creating Authentication header for HTTP GET
    auth_header = {'X-MAL-CLIENT-ID' : client_id}
    base_url = "https://api.myanimelist.net/v2/"
    studios = {}
    # HTTP GET each show's Production Studios
    for show in show_ids:
        url = base_url + content + "/" + str(show)
        r = requests.get(url, headers=auth_header, params={'fields':'studios'})
        studios[show] = r.json()['studios']
    return studios

In [23]:
get_show_studio(client_id, 1)

{1: [{'id': 14, 'name': 'Sunrise'}]}

In [24]:
def get_preference(client_id, user, field, content_type='anime', only_watched=True):
    """
    Parameters:
    
    client_id    - Client ID generated by MyAnimeList app ("www.myanimelist.net/apiconfig/")
    user         - MAL username that is being analysed
    field        - string for field that is wanted
    content_type - anime or manga, to decide whether to find animelist or mangalist respectively
    only_watched - Set False if all entries (including Dropped and Plan to Watch) are to be analysed
    """
    # get user list and season info
    show_list = {}
    user_animelist = get_user_list_ids(client_id, user, content_type, fields='list_status,'+field)
    field_list = {}
    if only_watched:
        for node in user_animelist['data']:
            if node['list_status']['status'] == "completed":
                show_list[node['node']['id']] = node['list_status']['score']
                field_list[node['node']['id']] = node['node'][field]
    else:
        for node in user_anilist['data']:
            show_list[node['node']['id']] = node['list_status']['score']
            field_list[node['node']['id']] = node['node'][field]
    show_ids = []
    fields = {}
    # get show ids
    for key in show_list.keys():
        show_ids.append(key)
    # iterate through genres to get count of number of shows with each genre tag and average user score
    for show in field_list.keys():
        for field_ in field_list[show]:
            if field_['id'] not in fields.keys():
                fields[field_['id']] = {field : field_, 'count' : 1, 'avg_score' : show_list[show]}
            else:
                count = fields[field_['id']]['count'] + 1
                avg_score = (fields[field_['id']]['avg_score'] * (count - 1) + show_list[show]) / count
                fields[field_['id']]['count'] = count
                fields[field_['id']]['avg_score'] = avg_score
    return_field = []
    for field in fields.keys():
        return_field.append(fields[field])
    return return_field            

In [25]:
get_preference(client_id, 'turtlemage', 'studios')

[{'studios': {'id': 1119, 'name': 'Shuka'}, 'count': 1, 'avg_score': 9},
 {'studios': {'id': 441, 'name': '8bit'}, 'count': 4, 'avg_score': 6.25},
 {'studios': {'id': 48, 'name': 'AIC'}, 'count': 1, 'avg_score': 10},
 {'studios': {'id': 314, 'name': 'White Fox'}, 'count': 6, 'avg_score': 8.0},
 {'studios': {'id': 56, 'name': 'A-1 Pictures'},
  'count': 11,
  'avg_score': 6.2727272727272725},
 {'studios': {'id': 1103, 'name': 'TROYCA'}, 'count': 1, 'avg_score': 6},
 {'studios': {'id': 163, 'name': 'asread.'},
  'count': 3,
  'avg_score': 5.666666666666667},
 {'studios': {'id': 537, 'name': 'SANZIGEN'}, 'count': 1, 'avg_score': 7},
 {'studios': {'id': 839, 'name': 'LIDENFILMS'},
  'count': 3,
  'avg_score': 7.333333333333333},
 {'studios': {'id': 300, 'name': 'SILVER LINK.'},
  'count': 8,
  'avg_score': 6.625},
 {'studios': {'id': 44, 'name': 'Shaft'}, 'count': 6, 'avg_score': 8.5},
 {'studios': {'id': 1129, 'name': 'Pierrot Plus'},
  'count': 2,
  'avg_score': 7.0},
 {'studios': {'id':

In [26]:
def get_top_shows(client_id, numShows, content_type='anime'):
    """
    Parameters:
    
    client_id    - Client ID generated by MyAnimeList app ("www.myanimelist.net/apiconfig/")
    numShows     - number of shows (from the top) to return
    content_type - anime or manga, to decide whether to find animelist or mangalist respectively
    """ 
    # Define Content Type for url definition
    if content_type == 'anime':
        content = 'anime'
    else:
        content = 'manga'
    # Creating Authentication header for HTTP GET
    auth_header = {'X-MAL-CLIENT-ID' : client_id}
    base_url = "https://api.myanimelist.net/v2/"
    # Until numShows is reached, HTTP GET more
    if numShows < 500:
        iteration = numShows
    else:
        iteration = 500
    url = base_url + content + '/ranking?ranking_type=all&limit=' + str(iteration)
    r = requests.get(url, headers=auth_header)
    show_list = []
    i = 0
    while i < numShows:
        for node in r.json()['data']:
            if i < numShows:
                show_list.append(node['node']['id'])
                i = i + 1
        if i < numShows:
            r = requests.get(r.json()['paging']['next'], headers=auth_header)
    return show_list

In [27]:
topshows = get_top_shows(client_id, 200)

In [28]:
get_show_stats(client_id, show_ids=get_top_shows(client_id, 50), stat='studios')

[[{'id': 4, 'name': 'Bones'}],
 [{'id': 1258, 'name': 'Bandai Namco Pictures'}],
 [{'id': 858, 'name': 'Wit Studio'}],
 [{'id': 858, 'name': 'Wit Studio'}, {'id': 1835, 'name': 'CloverWorks'}],
 [{'id': 314, 'name': 'White Fox'}],
 [{'id': 14, 'name': 'Sunrise'}],
 [{'id': 1258, 'name': 'Bandai Namco Pictures'}],
 [{'id': 11, 'name': 'Madhouse'}],
 [{'id': 73, 'name': 'TMS Entertainment'}],
 [{'id': 14, 'name': 'Sunrise'}],
 [{'id': 1269, 'name': 'K-Factory'},
  {'id': 2256, 'name': 'Kitty Film Mitaka Studio'}],
 [{'id': 1258, 'name': 'Bandai Namco Pictures'}],
 [{'id': 44, 'name': 'Shaft'}],
 [{'id': 56, 'name': 'A-1 Pictures'}],
 [{'id': 2, 'name': 'Kyoto Animation'}],
 [{'id': 14, 'name': 'Sunrise'}],
 [{'id': 2, 'name': 'Kyoto Animation'}],
 [{'id': 2, 'name': 'Kyoto Animation'}],
 [{'id': 14, 'name': 'Sunrise'}],
 [{'id': 14, 'name': 'Sunrise'}],
 [{'id': 44, 'name': 'Shaft'}],
 [{'id': 569, 'name': 'MAPPA'}],
 [{'id': 43, 'name': 'ufotable'}],
 [{'id': 1258, 'name': 'Bandai Namco

In [29]:
def get_top_shows_stat(client_id, show_num, stat='', content_type='anime'):
    """
    Parameters:
    
    client_id    - Client ID generated by MyAnimeList app ("www.myanimelist.net/apiconfig/")
    show_num     - number of shows (from the top) to return
    stat         - statistic to get
    content_type - anime or manga, to decide whether to find animelist or mangalist respectively
    """ 
    # Define Content Type for url definition
    if content_type == 'anime':
        content = 'anime'
    else:
        content = 'manga'
    # Creating Authentication header for HTTP GET
    auth_header = {'X-MAL-CLIENT-ID' : client_id}
    base_url = "https://api.myanimelist.net/v2/"
    # Until numShows is reached, HTTP GET more
    if show_num < 500:
        iteration = show_num
    else:
        iteration = 500
    url = base_url + content + '/ranking?ranking_type=all&limit=' + str(iteration) + "&fields=" + stat
    r = requests.get(url, headers=auth_header)
    show_list = []
    i = 0
    while i < show_num:
        for node in r.json()['data']:
            if i < show_num:
                show_list.append(node['node'])
                i = i + 1
        if i < show_num:
            r = requests.get(r.json()['paging']['next'], headers=auth_header)
    return show_list

In [30]:
top_shows = get_top_shows_stat(client_id, 10000, stat='start_season')
i = 1
for show in top_shows:
    print(str(i) + " " + str(show['id']) + " : " +str(show['start_season']))
    i = i+1

1 5114 : {'year': 2009, 'season': 'spring'}
2 28977 : {'year': 2015, 'season': 'spring'}
3 38524 : {'year': 2019, 'season': 'spring'}
4 50265 : {'year': 2022, 'season': 'spring'}
5 9253 : {'year': 2011, 'season': 'spring'}
6 9969 : {'year': 2011, 'season': 'spring'}
7 39486 : {'year': 2021, 'season': 'winter'}
8 11061 : {'year': 2011, 'season': 'fall'}
9 42938 : {'year': 2021, 'season': 'spring'}
10 15417 : {'year': 2012, 'season': 'fall'}
11 820 : {'year': 1988, 'season': 'winter'}
12 34096 : {'year': 2017, 'season': 'winter'}
13 35180 : {'year': 2017, 'season': 'fall'}
14 43608 : {'year': 2022, 'season': 'spring'}
15 28851 : {'year': 2016, 'season': 'summer'}
16 918 : {'year': 2006, 'season': 'spring'}
17 37987 : {'year': 2020, 'season': 'summer'}
18 4181 : {'year': 2008, 'season': 'fall'}
19 15335 : {'year': 2013, 'season': 'summer'}
20 2904 : {'year': 2008, 'season': 'spring'}
21 35247 : {'year': 2017, 'season': 'summer'}
22 48583 : {'year': 2022, 'season': 'winter'}
23 47778 : {'y

KeyError: 'start_season'

In [128]:
def get_friend_list(user):
    # get options for selenium
    firefox_options = Options()
    firefox_options.add_argument('--headless')
    
    # initialize the firefox web driver 
    driver = webdriver.Firefox(WEBDRIVER_PATH, options=firefox_options) 
    
    # define URL
    URL = 'https://www.myanimelist.net/profile/'+user+"/friends/"
    driver.get(URL)
    time.sleep(5)
    # find html element with friends    
    xpath = '/html/body/div[1]/div[2]/div[3]/div[2]/table/tbody/tr/td[2]/div[2]'
    friends_html = driver.find_element_by_xpath(xpath)
    # print(friends_html.text)
    friends_full_list = friends_html.text.split('\n')
    friends_full_list = [i for i in friends_full_list if i]
    i = 0
    friends = []
    for term in friends_full_list:
        if i == 0:
            friends.append(term)
        if i == 2:
            i = -1
        i = i + 1
    return friends

In [116]:
print(get_friend_list("King_Jode"))

  driver = webdriver.Firefox(WEBDRIVER_PATH, options=firefox_options)


['ohio64', 'turtlemage', 'happydr']


  friends_html = driver.find_element_by_xpath(xpath)


In [118]:
get_user_list_ids(client_id, "cheeseflavor")

{'data': [{'node': {'id': 41380,
    'title': '100-man no Inochi no Ue ni Ore wa Tatteiru',
    'main_picture': {'medium': 'https://api-cdn.myanimelist.net/images/anime/1506/117717.jpg',
     'large': 'https://api-cdn.myanimelist.net/images/anime/1506/117717l.jpg'}},
   'list_status': {'status': 'completed',
    'score': 5,
    'num_episodes_watched': 12,
    'is_rewatching': False,
    'updated_at': '2021-03-29T00:07:49+00:00',
    'finish_date': '2020-12-29'}},
  {'node': {'id': 44881,
    'title': '100-man no Inochi no Ue ni Ore wa Tatteiru 2nd Season',
    'main_picture': {'medium': 'https://api-cdn.myanimelist.net/images/anime/1424/117718.jpg',
     'large': 'https://api-cdn.myanimelist.net/images/anime/1424/117718l.jpg'}},
   'list_status': {'status': 'on_hold',
    'score': 0,
    'num_episodes_watched': 2,
    'is_rewatching': False,
    'updated_at': '2022-03-21T17:38:13+00:00'}},
  {'node': {'id': 38101,
    'title': '5-toubun no Hanayome',
    'main_picture': {'medium': 'htt

In [143]:
def get_friends_preference(client_id, username, content_type='anime'):
    show_list = {}
    friends = get_friend_list(username)
    for friend in friends:
        s = get_user_list_ids(client_id, friend, content_type=content_type, fields='list_status')
        for node in s['data']:
            if not node['list_status']['score'] == 0:
                show_id = node['node']['id']
                if show_id in show_list.keys():
                    show_list[show_id] = {'count' : show_list[show_id]['count']+1, 'avg' : (show_list[show_id]['avg']*show_list[show_id]['count']+node['list_status']['score'])/(show_list[show_id]['count']+1)}
                else:
                    show_list[show_id] = {'count' : 1, 'avg' : node['list_status']['score']}
    return show_list

In [144]:
print(get_friends_preference(client_id, 'turtlemage'))

  driver = webdriver.Firefox(WEBDRIVER_PATH, options=firefox_options)
  friends_html = driver.find_element_by_xpath(xpath)


{9919: {'count': 2, 'avg': 7.5}, 1575: {'count': 7, 'avg': 9.285714285714286}, 2904: {'count': 6, 'avg': 9.5}, 28121: {'count': 3, 'avg': 7.666666666666667}, 37347: {'count': 2, 'avg': 8.0}, 40454: {'count': 2, 'avg': 8.5}, 6702: {'count': 2, 'avg': 8.5}, 22043: {'count': 2, 'avg': 8.5}, 35972: {'count': 2, 'avg': 8.0}, 5114: {'count': 5, 'avg': 8.2}, 25159: {'count': 1, 'avg': 8}, 37210: {'count': 1, 'avg': 8}, 38472: {'count': 1, 'avg': 10}, 39988: {'count': 1, 'avg': 10}, 40748: {'count': 8, 'avg': 8.75}, 40750: {'count': 2, 'avg': 6.5}, 38000: {'count': 9, 'avg': 8.777777777777779}, 30831: {'count': 4, 'avg': 7.25}, 32937: {'count': 4, 'avg': 7.0}, 38040: {'count': 3, 'avg': 8.0}, 37984: {'count': 1, 'avg': 8}, 17265: {'count': 2, 'avg': 8.0}, 23321: {'count': 2, 'avg': 6.5}, 40496: {'count': 1, 'avg': 8}, 32182: {'count': 6, 'avg': 8.5}, 37510: {'count': 6, 'avg': 8.833333333333334}, 39535: {'count': 6, 'avg': 8.833333333333334}, 23755: {'count': 2, 'avg': 8.0}, 34577: {'count': 2