In [175]:
from jikanpy import Jikan
import json
import time
import pandas as pd

# Initialize Jikan with the v4 base URL
jikan = Jikan(selected_base='https://api.jikan.moe/v4')

In [176]:
jikan = Jikan()

# Placeholder for the extracted data
extracted_data = []

# Define the year ranges you want to fetch data for
year_ranges = [(1917,1989), (1990, 1995), (1996, 2000), (2001, 2005), (2006, 2010), (2011, 2015), (2016, 2020), (2021, 2024)]

# Function to fetch and process anime data within a given year range
def fetch_anime_data(start_year, end_year):
    page = 1
    while True:
        try:
            # Fetch data from the API
            response = jikan.search('anime', '', page=page, parameters={
                'start_date': f'{start_year}-01-01',
                'end_date': f'{end_year}-12-31',
                'type': 'tv',
            })
            
            # Process each anime in the current page's response
            for anime in response['data']:
                mal_id = anime.get('mal_id')
                score = anime.get('score')
                rank = anime.get('rank')
                popularity = anime.get('popularity')
                season = anime.get('season')
                year = anime.get('year')

                # Extracting titles
                title_default = None
                title_japanese = None

                for title_entry in anime.get('titles', []):
                    if title_entry['type'] == 'Default':
                        title_default = title_entry['title']
                    elif title_entry['type'] == 'Japanese':
                        title_japanese = title_entry['title']

                # Extract all genre names as a comma-separated string
                genres = ', '.join([genre['name'] for genre in anime.get('genres', [])])
                
                # Append the extracted information as a tuple to the list
                extracted_data.append((mal_id, title_default, title_japanese, score, rank, popularity, season, year, genres))
            
            # Check if there's another page
            if not response['pagination']['has_next_page']:
                break
            
            # Move to the next page
            page += 1
            
            # Respect the rate limit
            time.sleep(1.2)  # Slightly more than 1 second to ensure we're under the limit
            
        except Exception as e:
            print(f"An error occurred: {e}")
            break

In [177]:
# Fetch data for each defined year range
for start_year, end_year in year_ranges:
    print(f"Fetching data from {start_year} to {end_year}...")
    fetch_anime_data(start_year, end_year)

# Convert the list of tuples to a pandas DataFrame
df = pd.DataFrame(extracted_data, columns=[
    'mal_id', 'title_default', 'title_japanese', 'score', 'rank', 'popularity', 'season', 'year', 'genres'
])


Fetching data from 1917 to 1989...
Fetching data from 1990 to 1995...
Fetching data from 1996 to 2000...
Fetching data from 2001 to 2005...
Fetching data from 2006 to 2010...
Fetching data from 2011 to 2015...
Fetching data from 2016 to 2020...
Fetching data from 2021 to 2024...


In [178]:
df

Unnamed: 0,mal_id,title_default,title_japanese,score,rank,popularity,season,year,genres
0,80,Kidou Senshi Gundam,機動戦士ガンダム,7.75,1108.0,1691,spring,1979.0,"Action, Drama, Sci-Fi"
1,85,Kidou Senshi Zeta Gundam,機動戦士Ζガンダム,7.89,808.0,2499,spring,1985.0,"Action, Drama, Romance, Sci-Fi"
2,86,Kidou Senshi Gundam ZZ,機動戦士ガンダムZZ,6.65,5963.0,3379,spring,1986.0,"Action, Comedy, Drama, Sci-Fi"
3,223,Dragon Ball,ドラゴンボール,7.97,677.0,149,winter,1986.0,"Action, Adventure, Comedy, Fantasy"
4,283,Akage no Anne,赤毛のアン,7.79,1020.0,3875,winter,1979.0,Drama
...,...,...,...,...,...,...,...,...,...
5420,59147,Tobot V: Ujusuhodae,또봇V 우주수호대,,14301.0,26969,spring,2021.0,"Action, Sci-Fi"
5421,59583,Shasha and Milo Part 2,샤샤&마일로 NEW에피소드,,13478.0,26984,spring,2024.0,"Action, Adventure, Comedy"
5422,59586,Jyulagi Cops 3rd Season,쥬라기 캅스3,,15843.0,27093,fall,2021.0,Action
5423,59649,Nanashi Kaidan 3rd Season,ななし怪談 第3作,,17261.0,22102,summer,2024.0,"Horror, Supernatural"


In [179]:
import os

# Get the current working directory (which should be your project root)
script_dir = os.path.dirname(os.path.realpath('test.ipynb'))

# Construct the path to the 'data' directory relative to the project root
data_dir = os.path.join(script_dir, '..', 'data')

# Ensure the data directory exists
os.makedirs(data_dir, exist_ok=True)

# Define the file path for saving your DataFrame with UTF-8 encoding
file_path = os.path.join(data_dir, 'anime_data.xlsx')
file_path2 = os.path.join(data_dir, 'anime_data.csv')

# Save your DataFrame to the file
df.to_excel(file_path, index=False, engine='openpyxl')
df.to_csv(file_path2, index=False, encoding='utf-8-sig')

print(f"Data saved to {file_path} & {file_path2}.")


Data saved to C:\Users\spata\Documents\GitHub\animearc_gc\nb\..\data\anime_data.xlsx & C:\Users\spata\Documents\GitHub\animearc_gc\nb\..\data\anime_data.csv.


In [180]:
print(df.info())
print(df.describe())


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5425 entries, 0 to 5424
Data columns (total 9 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   mal_id          5425 non-null   int64  
 1   title_default   5425 non-null   object 
 2   title_japanese  5420 non-null   object 
 3   score           4431 non-null   float64
 4   rank            5417 non-null   float64
 5   popularity      5425 non-null   int64  
 6   season          5311 non-null   object 
 7   year            5311 non-null   float64
 8   genres          5425 non-null   object 
dtypes: float64(3), int64(2), object(4)
memory usage: 381.6+ KB
None
             mal_id        score          rank    popularity         year
count   5425.000000  4431.000000   5417.000000   5425.000000  5311.000000
mean   24115.061567     6.830255   7536.837733   7762.020461  2008.859537
std    18771.340739     0.812570   5647.749226   7084.257551    13.177430
min        1.000000     2.890000  

In [181]:
print(df['year'].min())
print(df['year'].max())

1961.0
2024.0


In [182]:
# Split the genres into a list
df['genres'] = df['genres'].str.split(', ')

# Explode the list so each genre gets its own row
df_expanded = df.explode('genres')

In [183]:
df_expanded.head()

Unnamed: 0,mal_id,title_default,title_japanese,score,rank,popularity,season,year,genres
0,80,Kidou Senshi Gundam,機動戦士ガンダム,7.75,1108.0,1691,spring,1979.0,Action
0,80,Kidou Senshi Gundam,機動戦士ガンダム,7.75,1108.0,1691,spring,1979.0,Drama
0,80,Kidou Senshi Gundam,機動戦士ガンダム,7.75,1108.0,1691,spring,1979.0,Sci-Fi
1,85,Kidou Senshi Zeta Gundam,機動戦士Ζガンダム,7.89,808.0,2499,spring,1985.0,Action
1,85,Kidou Senshi Zeta Gundam,機動戦士Ζガンダム,7.89,808.0,2499,spring,1985.0,Drama


In [184]:
genre_count = df_expanded.groupby(['genres']).size().reset_index(name='count')
genre_count['percentage'] = (genre_count['count'] / genre_count['count'].sum() * 100).round(1)
genre_count

Unnamed: 0,genres,count,percentage
0,,331,2.8
1,Action,1632,13.9
2,Adventure,1145,9.8
3,Avant Garde,24,0.2
4,Award Winning,54,0.5
5,Boys Love,31,0.3
6,Comedy,2220,18.9
7,Drama,832,7.1
8,Ecchi,312,2.7
9,Erotica,8,0.1


In [185]:
sort_genre_count = genre_count.sort_values(by='count', ascending=False)
sort_genre_count

Unnamed: 0,genres,count,percentage
6,Comedy,2220,18.9
1,Action,1632,13.9
10,Fantasy,1388,11.8
2,Adventure,1145,9.8
16,Sci-Fi,1003,8.6
7,Drama,832,7.1
15,Romance,779,6.6
17,Slice of Life,482,4.1
19,Supernatural,471,4.0
14,Mystery,336,2.9


In [186]:
print(df['rank'].min())
print(df['rank'].max())
print(df['score'].min())
print(df['score'].max())
print(df['popularity'].min())
print(df['popularity'].max())

1.0
20916.0
2.89
9.33
1
27109


In [187]:
pop_10 = df.nsmallest(10, 'popularity')
pop_10

Unnamed: 0,mal_id,title_default,title_japanese,score,rank,popularity,season,year,genres
2643,16498,Shingeki no Kyojin,進撃の巨人,8.55,109.0,1,spring,2013.0,"[Action, Award Winning, Drama, Suspense]"
1585,1535,Death Note,デスノート,8.62,85.0,2,fall,2006.0,"[Supernatural, Suspense]"
1928,5114,Fullmetal Alchemist: Brotherhood,鋼の錬金術師 FULLMETAL ALCHEMIST,9.09,2.0,3,spring,2009.0,"[Action, Adventure, Drama, Fantasy]"
3091,30276,One Punch Man,ワンパンマン,8.49,140.0,4,fall,2015.0,"[Action, Comedy]"
2456,11757,Sword Art Online,ソードアート・オンライン,7.21,3194.0,5,summer,2012.0,"[Action, Adventure, Fantasy, Romance]"
4122,38000,Kimetsu no Yaiba,鬼滅の刃,8.46,152.0,6,spring,2019.0,"[Action, Award Winning, Fantasy]"
3432,31964,Boku no Hero Academia,僕のヒーローアカデミア,7.85,885.0,7,spring,2016.0,[Action]
2422,11061,Hunter x Hunter (2011),HUNTER×HUNTER（ハンター×ハンター）,9.03,8.0,9,fall,2011.0,"[Action, Adventure, Fantasy]"
2864,22319,Tokyo Ghoul,東京喰種-トーキョーグール-,7.79,1015.0,10,summer,2014.0,"[Action, Fantasy, Horror, Suspense]"
3335,25777,Shingeki no Kyojin Season 2,進撃の巨人 Season2,8.52,123.0,12,spring,2017.0,"[Action, Drama, Suspense]"


In [188]:
rank_10 = df.nsmallest(10, 'rank')
rank_10

Unnamed: 0,mal_id,title_default,title_japanese,score,rank,popularity,season,year,genres
5148,52991,Sousou no Frieren,葬送のフリーレン,9.33,1.0,212,fall,2023.0,"[Adventure, Drama, Fantasy]"
1928,5114,Fullmetal Alchemist: Brotherhood,鋼の錬金術師 FULLMETAL ALCHEMIST,9.09,2.0,3,spring,2009.0,"[Action, Adventure, Drama, Fantasy]"
2299,9253,Steins;Gate,STEINS;GATE,9.07,3.0,14,spring,2011.0,"[Drama, Sci-Fi, Suspense]"
4183,38524,Shingeki no Kyojin Season 3 Part 2,進撃の巨人 Season3 Part.2,9.05,5.0,21,spring,2019.0,"[Action, Drama, Suspense]"
2330,9969,Gintama',銀魂',9.03,7.0,395,spring,2011.0,"[Action, Comedy, Sci-Fi]"
2422,11061,Hunter x Hunter (2011),HUNTER×HUNTER（ハンター×ハンター）,9.03,8.0,9,fall,2011.0,"[Action, Adventure, Fantasy]"
2596,15417,Gintama': Enchousen,銀魂' 延長戦,9.02,9.0,744,fall,2012.0,"[Action, Comedy, Sci-Fi]"
4653,41467,Bleach: Sennen Kessen-hen,BLEACH 千年血戦篇,9.01,10.0,394,fall,2022.0,"[Action, Adventure, Fantasy]"
4724,43608,Kaguya-sama wa Kokurasetai: Ultra Romantic,かぐや様は告らせたい-ウルトラロマンティック-,9.0,12.0,170,spring,2022.0,"[Comedy, Romance]"
3656,34096,Gintama.,銀魂。,8.98,14.0,770,winter,2017.0,"[Action, Comedy, Sci-Fi]"


In [189]:
score_10 = df.nlargest(10, 'score')
score_10

Unnamed: 0,mal_id,title_default,title_japanese,score,rank,popularity,season,year,genres
5148,52991,Sousou no Frieren,葬送のフリーレン,9.33,1.0,212,fall,2023.0,"[Adventure, Drama, Fantasy]"
1928,5114,Fullmetal Alchemist: Brotherhood,鋼の錬金術師 FULLMETAL ALCHEMIST,9.09,2.0,3,spring,2009.0,"[Action, Adventure, Drama, Fantasy]"
2299,9253,Steins;Gate,STEINS;GATE,9.07,3.0,14,spring,2011.0,"[Drama, Sci-Fi, Suspense]"
4183,38524,Shingeki no Kyojin Season 3 Part 2,進撃の巨人 Season3 Part.2,9.05,5.0,21,spring,2019.0,"[Action, Drama, Suspense]"
2330,9969,Gintama',銀魂',9.03,7.0,395,spring,2011.0,"[Action, Comedy, Sci-Fi]"
2422,11061,Hunter x Hunter (2011),HUNTER×HUNTER（ハンター×ハンター）,9.03,8.0,9,fall,2011.0,"[Action, Adventure, Fantasy]"
2596,15417,Gintama': Enchousen,銀魂' 延長戦,9.02,9.0,744,fall,2012.0,"[Action, Comedy, Sci-Fi]"
4653,41467,Bleach: Sennen Kessen-hen,BLEACH 千年血戦篇,9.01,10.0,394,fall,2022.0,"[Action, Adventure, Fantasy]"
4724,43608,Kaguya-sama wa Kokurasetai: Ultra Romantic,かぐや様は告らせたい-ウルトラロマンティック-,9.0,12.0,170,spring,2022.0,"[Comedy, Romance]"
3656,34096,Gintama.,銀魂。,8.98,14.0,770,winter,2017.0,"[Action, Comedy, Sci-Fi]"
