# Extracting information using Jikan API
https://docs.api.jikan.moe/

In [None]:
# Import Libraries
import requests, json, os, sys, time
import pandas as pd
from datetime import datetime

# V.5 BY PAGINATION

In [None]:
'''
Creating time variables.
'''
current_time = time.strftime("%H_%M_%S",time.localtime())
date = datetime.now()
actual_date = date.strftime("%Y_%m_%d")

'''
Preparing folder variables.
'''
os.chdir(os.path.dirname(sys.path[0])) # Este comando hace que el notebook sea la ruta principal y poder trabajar en cascada
folder_path = sys.path[0]
data_path = os.path.join(folder_path,"data") # Here we will save the csv that later will be created
logs_path = os.path.join(data_path,"logs") # Here we will save the log files

#Remove the limit to see the df
pd.set_option('display.max_columns', None)

#Creating the necessary lists
anime_list = []
missing_requests_list = []
resource_does_not_exist = []

'''
Creates the folders needed to store data for the current day (in case the folders does not exist).
'''
def create_folder(name):
    try:
        os.makedirs(os.path.join(folder_path,str(name)))
    except FileExistsError:
        print("The folder data already exist")
        pass
    try:
        os.mkdir(os.path.join(data_path,str("logs")))
        print("The folder",str("logs" +'_'+ actual_date),"was created")
    except FileExistsError:
        print("The folder",str(name +'_'+ actual_date),"already exist")
        pass
create_folder("data")

'''
To check if there is an empty value. If the category is empty, it returns None.
'''
def try_it(i):
    try:
        return i["name"]
    except:
        return None

'''
Def with a try check to get the finishing time of an anime, in case the anime is a movie, then it returns the release time.
If the anime is not a movie, it checks for the finishing time. If there is no finishing time, it returns None 
'''

url = "https://api.jikan.moe/v4/anime" # url of the api

r = requests.get(url)# request to a web page (url)    
 
data = r.json() # creating a variable for all the info we get

n_pages = data['pagination']['last_visible_page']

for page in range (1,n_pages +1):
    r_page = requests.get(url + '?page=' + str(page)) # request to a web page (url)
    content = r_page.json()
    print (page)
    data = content["data"]
    time.sleep(1)
    for char in data: #Already 1 to 13000 of 25850 #Loop to go thru a range of chosen numbers
        #time.sleep(1)
        try: # First try yo check if the page exist or not
            # Creation of the necessary dictionary o store the values in each loop # We specify which information to get in each Item
            anime_dict = {"Cover" : char["images"]["jpg"]["large_image_url"] if char["images"]["jpg"]["large_image_url"]  else None,
                        "English_Title" : char["title"] if char["title"]  else None,
                        "Japanses_Title" : char["title_japanese"] if char["title_japanese"]  else None,
                        "Type" : char["type"] if char["type"]  else None,
                        "Source" : char["source"] if char["status"] else None,
                        "Audience" : [try_it(i) for i in char["demographics"]], # List comprehension calling the Def try_it
                        "N_Episodes" : (int(char["episodes"])) if char["episodes"] else 0,
                        "Duration" : char["duration"] if char["duration"]  else None,
                        "Rating" : char["rating"] if char["rating"] else None,
                        "Score" : char["score"] if char["score"]  else None,
                        "Scored_by" : char["scored_by"] if char["scored_by"]  else None,
                        "Rank" : (int(char["rank"])) if char["rank"] else None,
                        "Season" : char["season"] if char["season"] else None,
                        "Genre" : [try_it(i) for i in char["genres"]],# List comprehension calling the Def try_it
                        "Theme" : [try_it(i) for i in char["themes"]],# List comprehension calling the Def try_it
                        "Released" : (int(char["aired"]["prop"]["from"]["year"])) if char["aired"]["prop"]["from"]["year"] else None, # If else in one line
                        "Studios" : [try_it(i) for i in char["studios"]],# List comprehension calling the Def try_it
                        "Producers" : [try_it(i) for i in char["producers"]]# List comprehension calling the Def try_it
                        }
                        
            anime_list.append(anime_dict) # Append the loop info to anime_list
            #time.sleep(1) # we use here a time sleep cuz if we are to fast asking for information, the server might block us
        # Ending of the first try specifying the error
        except:
            if r_page.status_code == 429: #If there is a 429 error we show it on screen and tell us the respuesta.reason
                missing_requests_list.append(id)
                print (f"El código de estado de la petición es: {r_page.status_code}. Estatus {r_page.reason}. No se puede recoger información de la página {id}\n")
            else:
                resource_does_not_exist.append(id) #If there is a any other error we show it on screen and tell us the respuesta.reason
                print (f"El código de estado de la petición es: {r_page.status_code}. Estatus {r_page.reason}. No se puede recoger información de la página {id}\n")
            continue

In [None]:
# We create df from anime_list and save it in a csv file adding actual date and time variables to the name
anime_df = pd.DataFrame(anime_list)
anime_csv = os.path.join(data_path, "anime_" + actual_date+ "_" +current_time + ".csv")
anime_df.to_csv(anime_csv, sep = ';', index = False)
print(f'anime_{actual_date}{current_time}.csv created\n\n')

# v.4 id by id

In [None]:
# Import Libraries
import requests, json, os, sys, time
import pandas as pd
from datetime import datetime
#Preparing time variables to later save documents
current_time = time.strftime("%H_%M_%S",time.localtime())
date = datetime.now()
actual_date = date.strftime("%Y_%m_%d")
#Preparing folder variables
os.chdir(os.path.dirname(sys.path[0])) # Este comando hace que el notebook sea la ruta principal y poder trabajar en cascada
folder_path = sys.path[0]
data_path = os.path.join(folder_path,"data")
logs_path = os.path.join(data_path,"logs")
#Remove the limit to see the df
pd.set_option('display.max_columns', None)
#Creating the necessary lists
anime_list = []
missing_requests_list = []
resource_does_not_exist = []
'''
Creates the folders needed to store data for the current day (in case the folders does not exist).
'''
def create_folder(name):
    try:
        os.makedirs(os.path.join(folder_path,str(name)))
    except FileExistsError:
        print("The folder data already exist")
        pass
    try:
        os.mkdir(os.path.join(data_path,str("logs")))
        print("The folder",str("logs" +'_'+ actual_date),"was created")
    except FileExistsError:
        print("The folder",str(name +'_'+ actual_date),"already exist")
        pass
create_folder("data")
#Def to check if there is an empty value. If the category is empty, it returns None
def try_it(i):
    try:
        return i["name"]
    except:
        return None
#Def with a try check to get the finishing time of an anime, in case the anime is a movie, then it returns the release time.
# If the anime is not a movie, it checks for the finishing time. If there is no finishing time, it returns None 
def movie():
    try:
        if ( data["type"] == "Movie" or data["type"] == "OVA" or data["type"] == "Special" or data["type"] == "Music"):
            return data["aired"]["prop"]["from"]["year"]
        elif data["status"] == "Currently Airing":
            return 999 #Still Airing
        elif data["aired"]["prop"]["to"]["year"] == None:
            return 666 # There is no information
        else:
            return data["aired"]["prop"]["to"]["year"]
    except:
        return 0 #No information about it
for id in range (1,25850): #Already 1 to 13000 of 25850 #Loop to go thru a range of chosen numbers
    time.sleep(1)
    try: # First try yo check if the page exist or not
        
        respuesta = requests.get(f"https://api.jikan.moe/v4/anime/{id}") # request to a web page (url)
        
        content = respuesta.json() # creating a variable for all the info we get
        data = content["data"] # Another variable to specify the position of the data we will get
        print (f"Page: {id}") # A print to know where we are
        # Creation of the necessary dictionary o store the values in each loop # We specify which information to get in each Item
        anime_dict = {"Cover" : data["images"]["jpg"]["large_image_url"] if data["images"]["jpg"]["large_image_url"]  else None,
                    "English_Title" : data["titles"][0]["title"] if data["titles"][0]["title"]  else None,
                    "Japanses_Title" : data["title_japanese"] if data["title_japanese"]  else None,
                    "Type" : data["type"] if data["type"]  else None,
                    "Source" : data["source"] if data["status"] else None,
                    "Audience" : [try_it(i) for i in data["demographics"]], # List comprehension calling the Def try_it
                    "N_Episodes" : (int(data["episodes"])) if data["episodes"] else 0,
                    "Duration" : data["duration"] if data["duration"]  else None,
                    "Status" : data["status"] if data["status"] else None,
                    "Rating" : data["rating"] if data["rating"] else None,
                    "Score" : data["score"] if data["score"]  else None,
                    "Scored_by" : data["scored_by"] if data["scored_by"]  else None,
                    "Rank" : (int(data["rank"])) if data["rank"] else None,
                    "Season" : data["season"] if data["season"] else None,
                    "Genre" : [try_it(i) for i in data["genres"]],# List comprehension calling the Def try_it
                    "Theme" : [try_it(i) for i in data["themes"]],# List comprehension calling the Def try_it
                    "Released" : (int(data["aired"]["prop"]["from"]["year"])) if data["aired"]["prop"]["from"]["year"] else None, # If else in one line
                    "Finished" : int(movie()), 
                    "Studios" : [try_it(i) for i in data["studios"]],# List comprehension calling the Def try_it
                    # "Licensors" : [try_it(i) for i in data["licensors"]],# List comprehension calling the Def try_it
                    "Producers" : [try_it(i) for i in data["producers"]]# List comprehension calling the Def try_it
                    }
                    
        anime_list.append(anime_dict) # Append the loop info to anime_list
        #time.sleep(1) # we use here a time sleep cuz if we are to fast asking for information, the server might block us
    # Ending of the first try specifying the error
    except:
        if respuesta.status_code == 429: #If there is a 429 error we show it on screen and tell us the respuesta.reason
            missing_requests_list.append(id)
            print (f"El código de estado de la petición es: {respuesta.status_code}. Estatus {respuesta.reason}. No se puede recoger información de la página {id}\n")
        else:
            resource_does_not_exist.append(id) #If there is a any other error we show it on screen and tell us the respuesta.reason
            print (f"El código de estado de la petición es: {respuesta.status_code}. Estatus {respuesta.reason}. No se puede recoger información de la página {id}\n")
        continue
# We create df from resource_does_not_exist list and save it in a csv file adding actual date and time variables to the name
resource_does_not_exist_df = pd.DataFrame(resource_does_not_exist)
not_exist_csv = os.path.join(logs_path, "resource_does_not_exist_" + actual_date+ "_" +current_time + ".csv")
resource_does_not_exist_df.to_csv(not_exist_csv , sep = ',', index = False)
print(f'resource_does_not_exist_{actual_date}{current_time}.csv saved into a csv file\n\n')
# We create df from missing_requests_list list and save it in a csv file adding actual date and time variables to the name
missing_requests_df = pd.DataFrame(missing_requests_list)
missing_csv = os.path.join(logs_path, "missing_requests_" + actual_date+ "_" +current_time + ".csv")
missing_requests_df.to_csv(missing_csv, sep = ',', index = False)
print(f'missing_requests_{actual_date}{current_time}.csv saved into a csv file\n\n')
# We create df from anime_list and save it in a csv file adding actual date and time variables to the name
anime_df = pd.DataFrame(anime_list)
anime_csv = os.path.join(data_path, "anime_" + actual_date+ "_" +current_time + ".csv")
anime_df.to_csv(anime_csv, sep = ';', index = False)
print(f'anime_{actual_date}{current_time}.csv created\n\n')