In [1]:
import time
import pandas as pd
import numpy as np
from jikanpy import Jikan

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
jikan = Jikan()

In [3]:
def get_seasonal_animes(year, 
                        season):
    
    seasonal_animes = []
    n = 0
    has_next_page = True

    while has_next_page:
        n += 1
        animes_in_current_page = jikan.seasons(year=year, season=season, page=n)
        seasonal_animes.extend(animes_in_current_page['data'])
        has_next_page = True if animes_in_current_page['pagination']['has_next_page'] else False

        time.sleep(2)

    return seasonal_animes

In [4]:
def get_animes_range_of_years(start_year, 
                              end_year = None):
    
    end_year = start_year if not end_year else end_year
    all_animes = []

    for year in range(start_year, end_year + 1):
        whole_year_animes = get_seasonal_animes(year, 'spring') + \
                            get_seasonal_animes(year, 'summer') + \
                            get_seasonal_animes(year, 'fall') + \
                            get_seasonal_animes(year, 'winter')
        
        all_animes.extend(whole_year_animes)
        print(f'## Collecting {year} data finished!')

    return all_animes

In [5]:
anime_data_2006_2022 = get_animes_range_of_years(2006, 2022)
df = pd.json_normalize(anime_data_2006_2022)

## Collecting 2006 data finished!
## Collecting 2007 data finished!
## Collecting 2008 data finished!
## Collecting 2009 data finished!
## Collecting 2010 data finished!
## Collecting 2011 data finished!
## Collecting 2012 data finished!
## Collecting 2013 data finished!
## Collecting 2014 data finished!
## Collecting 2015 data finished!
## Collecting 2016 data finished!
## Collecting 2017 data finished!
## Collecting 2018 data finished!
## Collecting 2019 data finished!
## Collecting 2020 data finished!
## Collecting 2021 data finished!
## Collecting 2022 data finished!


In [6]:
df.head()

Unnamed: 0,mal_id,url,approved,titles,title,title_english,title_japanese,title_synonyms,type,source,...,aired.prop.from.month,aired.prop.from.year,aired.prop.to.day,aired.prop.to.month,aired.prop.to.year,aired.string,broadcast.day,broadcast.time,broadcast.timezone,broadcast.string
0,853,https://myanimelist.net/anime/853/Ouran_Koukou...,True,"[{'type': 'Default', 'title': 'Ouran Koukou Ho...",Ouran Koukou Host Club,Ouran High School Host Club,桜蘭高校ホスト部,"[Ohran Koko Host Club, Ouran Koukou Hosutobu, ...",TV,Manga,...,4,2006,27.0,9.0,2006.0,"Apr 5, 2006 to Sep 27, 2006",Wednesdays,00:50,Asia/Tokyo,Wednesdays at 00:50 (JST)
1,918,https://myanimelist.net/anime/918/Gintama,True,"[{'type': 'Default', 'title': 'Gintama'}, {'ty...",Gintama,Gintama,銀魂,"[Gin Tama, Silver Soul, Yorinuki Gintama-san]",TV,Manga,...,4,2006,25.0,3.0,2010.0,"Apr 4, 2006 to Mar 25, 2010",Thursdays,18:00,Asia/Tokyo,Thursdays at 18:00 (JST)
2,889,https://myanimelist.net/anime/889/Black_Lagoon,True,"[{'type': 'Default', 'title': 'Black Lagoon'},...",Black Lagoon,Black Lagoon,BLACK LAGOON,[],TV,Manga,...,4,2006,25.0,6.0,2006.0,"Apr 9, 2006 to Jun 25, 2006",Sundays,02:35,Asia/Tokyo,Sundays at 02:35 (JST)
3,849,https://myanimelist.net/anime/849/Suzumiya_Har...,True,"[{'type': 'Default', 'title': 'Suzumiya Haruhi...",Suzumiya Haruhi no Yuuutsu,The Melancholy of Haruhi Suzumiya,涼宮ハルヒの憂鬱,[Suzumiya Haruhi no Yuuutsu],TV,Light novel,...,4,2006,3.0,7.0,2006.0,"Apr 3, 2006 to Jul 3, 2006",Mondays,00:00,Asia/Tokyo,Mondays at 00:00 (JST)
4,934,https://myanimelist.net/anime/934/Higurashi_no...,True,"[{'type': 'Default', 'title': 'Higurashi no Na...",Higurashi no Naku Koro ni,Higurashi: When They Cry,ひぐらしのなく頃に,"[When the Cicadas Cry, The Moment the Cicadas ...",TV,Visual novel,...,4,2006,27.0,9.0,2006.0,"Apr 5, 2006 to Sep 27, 2006",Wednesdays,01:30,Asia/Tokyo,Wednesdays at 01:30 (JST)


In [7]:
df.shape

(16337, 59)

In [8]:
df.columns

Index(['mal_id', 'url', 'approved', 'titles', 'title', 'title_english',
       'title_japanese', 'title_synonyms', 'type', 'source', 'episodes',
       'status', 'airing', 'duration', 'rating', 'score', 'scored_by', 'rank',
       'popularity', 'members', 'favorites', 'synopsis', 'background',
       'season', 'year', 'producers', 'licensors', 'studios', 'genres',
       'explicit_genres', 'themes', 'demographics', 'images.jpg.image_url',
       'images.jpg.small_image_url', 'images.jpg.large_image_url',
       'images.webp.image_url', 'images.webp.small_image_url',
       'images.webp.large_image_url', 'trailer.youtube_id', 'trailer.url',
       'trailer.embed_url', 'trailer.images.image_url',
       'trailer.images.small_image_url', 'trailer.images.medium_image_url',
       'trailer.images.large_image_url', 'trailer.images.maximum_image_url',
       'aired.from', 'aired.to', 'aired.prop.from.day',
       'aired.prop.from.month', 'aired.prop.from.year', 'aired.prop.to.day',
       'ai

In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16337 entries, 0 to 16336
Data columns (total 59 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   mal_id                            16337 non-null  int64  
 1   url                               16337 non-null  object 
 2   approved                          16337 non-null  bool   
 3   titles                            16337 non-null  object 
 4   title                             16337 non-null  object 
 5   title_english                     7135 non-null   object 
 6   title_japanese                    16279 non-null  object 
 7   title_synonyms                    16337 non-null  object 
 8   type                              16337 non-null  object 
 9   source                            16337 non-null  object 
 10  episodes                          16191 non-null  float64
 11  status                            16337 non-null  object 
 12  airi

In [12]:
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
mal_id,16337.0,34679.77603,15519.817037,356.0,24973.0,37921.0,46916.0,57879.0
episodes,16191.0,13.648323,33.821659,1.0,1.0,2.0,13.0,1664.0
score,10787.0,6.477158,0.894568,2.33,5.84,6.47,7.14,9.09
scored_by,10787.0,39631.189858,138933.921183,101.0,410.5,2471.0,17379.0,2754086.0
rank,12730.0,10020.123095,6062.839913,2.0,4639.25,9967.5,15387.25,20440.0
popularity,16337.0,12638.793842,7919.280985,1.0,5363.0,12626.0,19747.0,26089.0
members,16337.0,50339.30789,190065.515139,17.0,207.0,1169.0,14788.0,3880951.0
favorites,16337.0,563.967436,5013.5643,0.0,0.0,1.0,29.0,223131.0
year,3661.0,2014.680142,4.722127,2006.0,2011.0,2015.0,2018.0,2022.0
aired.prop.from.day,16337.0,12.831548,9.843854,1.0,4.0,11.0,22.0,31.0


In [11]:
df.to_csv('anime_data_2006_2022.csv', index=False)