In [82]:
from jikanpy import Jikan
import json
import time
import pandas as pd

# Initialize Jikan with the v4 base URL
jikan = Jikan(selected_base='https://api.jikan.moe/v4')

In [88]:
jikan = Jikan()

# Placeholder for the extracted data
extracted_data = []

# Define the year ranges you want to fetch data for
year_ranges = [(2010, 2012), (2013, 2015), (2016, 2018), (2019, 2021), (2022, 2023)]

# Function to fetch and process anime data within a given year range
def fetch_anime_data(start_year, end_year):
    page = 1
    while True:
        try:
            # Fetch data from the API
            response = jikan.search('anime', '', page=page, parameters={
                'start_date': f'{start_year}-01-01',
                'end_date': f'{end_year}-12-31',
                'type': 'tv',
            })
            
            # Process each anime in the current page's response
            for anime in response['data']:
                mal_id = anime.get('mal_id')
                score = anime.get('score')
                rank = anime.get('rank')
                popularity = anime.get('popularity')
                season = anime.get('season')
                year = anime.get('year')

                # Extracting titles
                title_default = None
                title_japanese = None

                for title_entry in anime.get('titles', []):
                    if title_entry['type'] == 'Default':
                        title_default = title_entry['title']
                    elif title_entry['type'] == 'Japanese':
                        title_japanese = title_entry['title']

                # Extract all genre names as a comma-separated string
                genres = ', '.join([genre['name'] for genre in anime.get('genres', [])])
                
                # Append the extracted information as a tuple to the list
                extracted_data.append((mal_id, title_default, title_japanese, score, rank, popularity, season, year, genres))
            
            # Check if there's another page
            if not response['pagination']['has_next_page']:
                break
            
            # Move to the next page
            page += 1
            
            # Respect the rate limit
            time.sleep(1.1)  # Slightly more than 1 second to ensure we're under the limit
            
        except Exception as e:
            print(f"An error occurred: {e}")
            break

In [89]:
# Fetch data for each defined year range
for start_year, end_year in year_ranges:
    print(f"Fetching data from {start_year} to {end_year}...")
    fetch_anime_data(start_year, end_year)

# Convert the list of tuples to a pandas DataFrame
df = pd.DataFrame(extracted_data, columns=[
    'mal_id', 'title_default', 'title_japanese', 'score', 'rank', 'popularity', 'season', 'year', 'genres'
])


Fetching data from 2010 to 2012...
Fetching data from 2013 to 2015...
Fetching data from 2016 to 2018...
Fetching data from 2019 to 2021...
Fetching data from 2022 to 2023...
      mal_id                               title_default  \
0       4334                                     Heroman   
1       5032                         Cobra The Animation   
2       5277                    Sekirei: Pure Engagement   
3       5337       Bakugan Battle Brawlers: New Vestroia   
4       5690                     Nodame Cantabile Finale   
...      ...                                         ...   
3005   57185           Heart Cocktail Colorful: Haru-hen   
3006   57186          Heart Cocktail Colorful: Natsu-hen   
3007   57436           Heart Cocktail Colorful: Fuyu-hen   
3008   57952  Jijia Yingxiong: Ji Dou Yongzhe 2nd Season   
3009   58630                    Oshiri Tantei 8th Season   

                title_japanese  score     rank  popularity  season    year  \
0                       ヒー

In [94]:
df

Unnamed: 0,mal_id,title_default,title_japanese,score,rank,popularity,season,year,genres
0,4334,Heroman,ヒーローマン,6.81,5087.0,3535,spring,2010.0,"Action, Sci-Fi"
1,5032,Cobra The Animation,COBRA THE ANIMATION,7.07,3900.0,6622,winter,2010.0,"Action, Adventure, Sci-Fi"
2,5277,Sekirei: Pure Engagement,セキレイ～Pure Engagement～,7.26,2964.0,1009,summer,2010.0,"Action, Comedy, Ecchi"
3,5337,Bakugan Battle Brawlers: New Vestroia,爆丸バトルブローラーズ New Vestroia,6.56,6448.0,2700,spring,2010.0,"Adventure, Fantasy"
4,5690,Nodame Cantabile Finale,のだめカンタービレ フィナーレ,8.22,355.0,1766,winter,2010.0,"Comedy, Romance"
...,...,...,...,...,...,...,...,...,...
3005,57185,Heart Cocktail Colorful: Haru-hen,ハートカクテル カラフル 春編,,20354.0,20914,winter,2023.0,Romance
3006,57186,Heart Cocktail Colorful: Natsu-hen,ハートカクテル カラフル 夏編,,20355.0,20863,summer,2023.0,Romance
3007,57436,Heart Cocktail Colorful: Fuyu-hen,ハートカクテル カラフル 冬編,,20353.0,17758,fall,2023.0,Romance
3008,57952,Jijia Yingxiong: Ji Dou Yongzhe 2nd Season,機甲英雄 機鬥勇者,,20874.0,23308,fall,2023.0,"Action, Sci-Fi"


In [110]:
import os

# Get the current working directory (which should be your project root)
script_dir = os.path.dirname(os.path.realpath('test.ipynb'))

# Construct the path to the 'data' directory relative to the project root
data_dir = os.path.join(script_dir, '..', 'data')

# Ensure the data directory exists
os.makedirs(data_dir, exist_ok=True)

# Define the file path for saving your DataFrame with UTF-8 encoding
file_path = os.path.join(data_dir, 'anime_data.xlsx')
file_path2 = os.path.join(data_dir, 'anime_data.csv')

# Save your DataFrame to the file
df.to_excel(file_path, index=False, engine='openpyxl')
df.to_csv(file_path2, index=False, encoding='utf-8-sig')

print(f"Data saved to {file_path} & {file_path2}.")


Data saved to C:\Users\spata\Documents\GitHub\animearc_gc\nb\..\data\anime_data.xlsx & C:\Users\spata\Documents\GitHub\animearc_gc\nb\..\data\anime_data.csv.
