In [9]:
import requests
import pandas as pd
import time
import key

# Get anime list

In [10]:
# api key for the header
header = {'X-MAL-CLIENT-ID':key.api_key}

In [11]:
# example
anime_response = requests.get("https://api.myanimelist.net/v2/anime/ranking?ranking_type=all&limit=2",  headers=header)
anime_response_json = anime_response.json()
anime_response_json

{'data': [{'node': {'id': 51535,
    'title': 'Shingeki no Kyojin: The Final Season - Kanketsu-hen',
    'main_picture': {'medium': 'https://api-cdn.myanimelist.net/images/anime/1279/131078.jpg',
     'large': 'https://api-cdn.myanimelist.net/images/anime/1279/131078l.jpg'}},
   'ranking': {'rank': 1}},
  {'node': {'id': 5114,
    'title': 'Fullmetal Alchemist: Brotherhood',
    'main_picture': {'medium': 'https://api-cdn.myanimelist.net/images/anime/1208/94745.jpg',
     'large': 'https://api-cdn.myanimelist.net/images/anime/1208/94745l.jpg'}},
   'ranking': {'rank': 2}}],
 'paging': {'next': 'https://api.myanimelist.net/v2/anime/ranking?offset=2&ranking_type=all&limit=2'}}

In [12]:
x = 0
idx_list = []
title_list = []
pic_list = []
api_url = "https://api.myanimelist.net/v2/anime/ranking?ranking_type=all&limit=500"

while x < 10000:
    anime_response_json = requests.get(api_url, headers=header).json()
    for i in range(500):
        idx_list.append(anime_response_json['data'][i]['node']['id'])
        title_list.append(anime_response_json['data'][i]['node']['title'])
        pic_list.append(anime_response_json['data'][i]['node']['main_picture']['medium'])
    api_url = anime_response_json['paging']['next']
    x += 500

print('idx_list length:', len(idx_list))
print('title_list length:', len(title_list))
print('pic_list length:', len(pic_list))

idx_list length: 10000
title_list length: 10000
pic_list length: 10000


In [13]:
anime_df = pd.DataFrame(data={'id':idx_list, 'title':title_list, 'picture_url':pic_list})
anime_df.head()

Unnamed: 0,id,title,picture_url
0,51535,Shingeki no Kyojin: The Final Season - Kankets...,https://api-cdn.myanimelist.net/images/anime/1...
1,5114,Fullmetal Alchemist: Brotherhood,https://api-cdn.myanimelist.net/images/anime/1...
2,41467,Bleach: Sennen Kessen-hen,https://api-cdn.myanimelist.net/images/anime/1...
3,9253,Steins;Gate,https://api-cdn.myanimelist.net/images/anime/1...
4,28977,Gintama°,https://api-cdn.myanimelist.net/images/anime/3...


In [14]:
anime_df.shape

(10000, 3)

In [15]:
# check for duplicates
anime_df.duplicated().value_counts()

False    10000
dtype: int64

In [16]:
# check nulls
anime_df.isnull().sum()

id             0
title          0
picture_url    0
dtype: int64

In [17]:
anime_df.to_csv('data/anime_df.csv', index=False)

# Get anime details

In [18]:
# list of fields available for getting anime details 
# id,title,main_picture,alternative_titles,start_date,end_date,synopsis,mean,rank,popularity,num_list_users,num_scoring_users,
# nsfw,created_at,updated_at,media_type,status,genres,my_list_status,num_episodes,start_season,broadcast,source,
# average_episode_duration,rating,pictures,background,related_anime,related_manga,recommendations,studios,statistics

In [22]:
# example
details_response = requests.get('https://api.myanimelist.net/v2/anime/51535?fields=id,alternative_titles,synopsis,mean,rank,popularity,nsfw,media_type,status,genres,num_episodes,start_season,source,average_episode_duration,rating,background,studios,statistics', headers=header)
details_response_json = details_response.json()
details_response_json

{'id': 51535,
 'title': 'Shingeki no Kyojin: The Final Season - Kanketsu-hen',
 'main_picture': {'medium': 'https://api-cdn.myanimelist.net/images/anime/1279/131078.jpg',
  'large': 'https://api-cdn.myanimelist.net/images/anime/1279/131078l.jpg'},
 'alternative_titles': {'synonyms': ['Shingeki no Kyojin: The Final Season Part 3',
   'Shingeki no Kyojin Season 4',
   'Attack on Titan Season 4'],
  'en': 'Attack on Titan: Final Season - The Final Chapters',
  'ja': '進撃の巨人 The Final Season完結編'},
 'synopsis': 'The conclusion to Shingeki no Kyojin.',
 'mean': 9.11,
 'rank': 1,
 'popularity': 540,
 'nsfw': 'white',
 'media_type': 'special',
 'status': 'currently_airing',
 'genres': [{'id': 1, 'name': 'Action'},
  {'id': 8, 'name': 'Drama'},
  {'id': 58, 'name': 'Gore'},
  {'id': 38, 'name': 'Military'},
  {'id': 27, 'name': 'Shounen'},
  {'id': 76, 'name': 'Survival'},
  {'id': 41, 'name': 'Suspense'}],
 'num_episodes': 2,
 'start_season': {'year': 2023, 'season': 'winter'},
 'source': 'mang

In [23]:
anime_details_list = []

# First way: takes about 2.5 hours
# for x in idx_list:
#     api_url = f'https://api.myanimelist.net/v2/anime/{x}?fields=id,alternative_titles,synopsis,mean,rank,popularity,nsfw,media_type,status,genres,num_episodes,start_season,source,average_episode_duration,rating,background,studios,statistics'
#     anime_details_list.append(requests.get(api_url, headers=header).json())
#     time.sleep(0.5)

# If the above loop encounters error (json decoder error / server busy), then try this second way: takes about 3-4 hours.
for i in range(0, 10000, 400):
    for x in idx_list[i:i+400]:
        api_url = f'https://api.myanimelist.net/v2/anime/{x}?fields=id,alternative_titles,synopsis,mean,rank,popularity,nsfw,media_type,status,genres,num_episodes,start_season,source,average_episode_duration,rating,background,studios,statistics'
        anime_details_list.append(requests.get(api_url, headers=header).json())
    time.sleep(180)

print('anime_details_list length:', len(anime_details_list))

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [14]:
anime_details_df = pd.DataFrame(anime_details_list)
anime_details_df.drop(['title', 'main_picture'], axis=1, inplace=True)
anime_details_df.head()

Unnamed: 0,id,alternative_titles,synopsis,nsfw,media_type,genres,source,studios
0,51535,{'synonyms': ['Shingeki no Kyojin: The Final S...,The conclusion to Shingeki no Kyojin.,white,special,"[{'id': 1, 'name': 'Action'}, {'id': 8, 'name'...",manga,"[{'id': 569, 'name': 'MAPPA'}]"
1,5114,{'synonyms': ['Hagane no Renkinjutsushi: Fullm...,After a horrific alchemy experiment goes wrong...,white,tv,"[{'id': 1, 'name': 'Action'}, {'id': 2, 'name'...",manga,"[{'id': 4, 'name': 'Bones'}]"
2,41467,{'synonyms': ['Bleach: Thousand-Year Blood War...,Substitute Soul Reaper Ichigo Kurosaki spends ...,white,tv,"[{'id': 1, 'name': 'Action'}, {'id': 2, 'name'...",manga,"[{'id': 1, 'name': 'Pierrot'}]"
3,9253,"{'synonyms': [], 'en': 'Steins;Gate', 'ja': 'S...",Eccentric scientist Rintarou Okabe has a never...,white,tv,"[{'id': 8, 'name': 'Drama'}, {'id': 40, 'name'...",visual_novel,"[{'id': 314, 'name': 'White Fox'}]"
4,28977,"{'synonyms': ['Gintama' (2015)'], 'en': 'Ginta...","Gintoki, Shinpachi, and Kagura return as the f...",white,tv,"[{'id': 1, 'name': 'Action'}, {'id': 4, 'name'...",manga,"[{'id': 1258, 'name': 'Bandai Namco Pictures'}]"


In [28]:
anime_details_df.shape

(10000, 8)

In [29]:
anime_details_df['id'].duplicated().value_counts()

False    10000
Name: id, dtype: int64

In [30]:
anime_details_df.to_csv('data/anime_details_df.csv', index=False)