In [1]:
#imports
import config
import requests
import json
import csv
import pprint
import pandas as pd


# pretty printer for nicer formating when needed
pp = pprint.PrettyPrinter(indent = 1)

# api key hidden in config.py. config.py kept in .gitignore
api_key = config.api_key

In [2]:
# to get id of genres
genres = requests.get(f"https://api.themoviedb.org/3/genre/tv/list?api_key={api_key}&language=en-US").json()

# genre id for animated tv shows is 16
pp.pprint(genres)

# testing out the api. query strings used are: api_key, language, sort_by, page, with_genre, include_null_first_air_dates
sample = pd.DataFrame((requests.get(f"https://api.themoviedb.org/3/discover/tv?api_key={api_key}&language=en-US&sort_by=first_air_date.desc&page=1&with_genres=16&include_null_first_air_dates=false")).json()["results"])

# each page retrieved by the api only has 20 tv shows
pp.pprint(sample)
sample.shape

{'genres': [{'id': 10759, 'name': 'Action & Adventure'},
            {'id': 16, 'name': 'Animation'},
            {'id': 35, 'name': 'Comedy'},
            {'id': 80, 'name': 'Crime'},
            {'id': 99, 'name': 'Documentary'},
            {'id': 18, 'name': 'Drama'},
            {'id': 10751, 'name': 'Family'},
            {'id': 10762, 'name': 'Kids'},
            {'id': 9648, 'name': 'Mystery'},
            {'id': 10763, 'name': 'News'},
            {'id': 10764, 'name': 'Reality'},
            {'id': 10765, 'name': 'Sci-Fi & Fantasy'},
            {'id': 10766, 'name': 'Soap'},
            {'id': 10767, 'name': 'Talk'},
            {'id': 10768, 'name': 'War & Politics'},
            {'id': 37, 'name': 'Western'}]}
                       backdrop_path first_air_date               genre_ids  \
0                               None     2035-11-09  [10762, 16, 35, 10751]   
1                               None     2021-09-10         [16, 18, 10765]   
2   /r4xrCeKjAbMlv3d46OaZXZoIU

(20, 13)

In [3]:
# initial creation of dataframe
animated = pd.DataFrame()

# manually checked, there are 367 pages of animated tv shows listed on tmdb
for i in range(1, 501):
    temp = pd.DataFrame((requests.get(f"https://api.themoviedb.org/3/discover/tv?api_key={api_key}&language=en-US&sort_by=first_air_date.desc&page={i}&with_genres=16&include_null_first_air_dates=false")).json()["results"])
    
# to collate all the animated tv shows in one big dataframe
    animated = pd.concat([animated, temp], ignore_index = True)

In [4]:
# got rid of useless columns
animated = animated.drop(["backdrop_path", "poster_path", "original_language", "vote_count", "popularity"], axis = 1)

# renaming columns
animated = animated.rename({"genre_id" : "genres"})

# removing rows of tv shows that aren't from JP
for index, row in animated.iterrows():
    # did not in, instead of == ['JP'] to include shows that are worked on in multiple countries including japan
    if 'JP' not in row["origin_country"]:
        animated.drop(index, inplace = True)

# convert to csv
animated.to_csv("from_japan.csv", index = False)

# convert to excel
animated.to_excel("from_japan.xlsx", index = False)

In [5]:
animated.shape

(3332, 8)