# Unsupervised content based recommendation system

## Import Libraries

In [10]:
# Standard library imports
import os # allows access to OS-dependent functionalities
import sys # provides access to system-specific parameters and functions in Python.

import numpy as np # functions for working in domain of linear algebra, fourier transform, matrices and arrays
import pandas as pd # data analysis and manipulation tool
import warnings

# setting display options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)

# Get the current working directory
cwd = os.getcwd()

# Add the path of the utils directory to sys.path
utils_path = os.path.abspath(os.path.join(cwd, '..', 'utils'))
sys.path.append(utils_path)

# Utils libraries
from cleaning import *
from recommend import *
from testing import *
from training import *

#Preparing folder variables
main_folder = os.path.abspath(os.path.join(os.pardir))
data_folder = (main_folder + "/" +"data")
saved_models_folder = (data_folder + "/" + "saved_models")
raw_data = (data_folder + "/" + "_raw")
processed_data = (data_folder + "/" + "processed")
content_based_supervised_data = (main_folder + "/" + "processed" + "/" + "content_based_supervised")

## Cleaning and preparing the data

### Checking 

In [37]:
# CSV file called "anime.csv" from a directory called raw_data and returns the contents as a Pandas DataFrame
anime = pd.read_csv(raw_data + "/" + "anime.csv") 

# CSV file called "rating.csv.zip" from a directory called raw_data and returns the contents as a Pandas DataFrame
rating = pd.read_csv(raw_data + "/" + "rating.csv.zip") 

In [38]:
# Checking for unique values.
to_check = ["rating","genre","type","source"]
for i in to_check:
    print ("Unique values of column:",i,"\n")
    print (anime[i].unique(),"\n")

Unique values of column: rating 

['R - 17+ (violence & profanity)' 'PG-13 - Teens 13 or older'
 'PG - Children' 'R+ - Mild Nudity' 'G - All Ages' 'Rx - Hentai' nan] 

Unique values of column: genre 

['Action, Adventure, Comedy, Drama, Sci-Fi, Space'
 'Action, Drama, Mystery, Sci-Fi, Space' 'Action, Comedy, Sci-Fi' ...
 'Action, Fantasy, Super Power, Supernatural, Vampire'
 'Action, Adventure, Fantasy, Game, Kids'
 'Comedy, Fantasy, Slice of Life, Supernatural'] 

Unique values of column: type 

['TV' 'Movie' 'OVA' 'Special' 'ONA' 'Music' nan] 

Unique values of column: source 

['Original' 'Manga' 'Light novel' 'Game' 'Visual novel' '4-koma manga'
 'Novel' 'Other' 'Unknown' 'Picture book' 'Web manga' 'Music' 'Radio'
 'Book' 'Card game' 'Mixed media'] 



We can see that we have some NaN and Unknown values.

In [39]:
# the null values
print(((anime.isnull().sum() / len(anime))*100).sort_values(ascending = False))
print(f"Total number of records: {len(anime)}")

score             14.048029
rank               9.572986
synopsis           2.475207
rating             1.614622
genre              0.499959
type               0.172117
japanses_title     0.155725
anime_id           0.000000
name               0.000000
english_title      0.000000
source             0.000000
duration           0.000000
episodes           0.000000
members            0.000000
cover              0.000000
dtype: float64
Total number of records: 12201


### Cleaning 

The columns rank, synopsis and japanses_title will only be use for showing the results of the recommendation, so we do not care about this 3 columns.

We will deal with the others.

To do that we will call the next functions from cleaning.py in utils folder.
- clean_anime_df
- predict_source
- clean_synopsis

In [40]:
print(clean_anime_df.__doc__)

The function clean_anime_df() takes an anime dataframe as input and performs several 
    cleaning and preprocessing steps, such as removing special characters from anime names, 
    converting all names to lowercase, filling missing values for "episodes" and "score" 
    columns with their median, dropping rows with null values for "genre" or "type" columns, 
    and saving the cleaned dataframe to a CSV file. The cleaned dataframe is also returned as output.


In [41]:
print(predict_source.__doc__)


    The function takes a pandas dataframe containing anime data and 
    fills in missing values in the 'source' column using a Decision 
    Tree Classifier based on the 'episodes' and 'type' columns. The 
    'type' column is converted to categorical data using get_dummies 
    before fitting the model. The function returns the original 
    dataframe with missing values filled in and the model accuracy score.
    


In [42]:
print(clean_synopsis.__doc__)


    This code uses regular expressions to clean up the text in the "synopsis" column
    of a pandas DataFrame. It removes any text in square brackets, removes any c
    arriage returns or newline characters, and removes any extra whitespace at 
    the beginning or end of the string.
    


The steps of this function:
- Create a copy of the original dataframe called anime_cleaned
- Remove all non-word characters from the name column and replace them with spaces
- Convert all names to lowercase
- Replace all "Unknown" values in the episodes column with NaN
- Replace all NaN values in the episodes column with the median of the column
- Convert the score column to float type
- Replace all NaN values in the score column with the median of the column
- Convert the members column to float type
- Apply the clean_synopsis function to the synopsis column
    - Remove \r and \n from synopsis
    - Remove extra spaces from synopsis
    - Replace encoded characters
    - Return synopsis
- Add prediction to the source column of the dataframe using the predict_source function
    - change unknown values to NaN from 'source' column
    - fill missing values in the 'episodes' column with 0
    - create dummy variables for the 'type' column
    - create dummy variables for the 'rating' column
    - First, we area going to split the genre column by comma, then expand the list, so there is a column for each genre. We will have 13 columns, because the anime with most genres tags has 13 tags
    - Now we can get the list of unique genres. We "convert" the dataframe into a single dimension array and take the unique values
    - Getting the dummy variables will result in having a lot more columns than unique genres
    - So we sum up the columns with the same genre to have a single column for each genre
    - split the data into training and validation sets
    - create the decision tree classifier
    - train the model using the training data
    - predict the 'source' values for the validation data
    - fill the 'NaN' 'source' values in the original DataFrame with the predicted values
    - undo the get_dummies() operation to convert the one-hot encoded 'type' and 'rating' columns back to a single categorical column
    - Dropping unnecessary columns
    - calculate the accuracy of the model
- Replace all NaN values in the genre column with the mode of the column
- Replace all NaN values in the rating column with the mode of the column
- Replace all NaN values in the type column with the mode of the column
- Save the cleaned dataframe to a CSV file called "_anime_to_compare_with_name.csv" 

In [43]:
anime_cleaned = clean_anime_df(anime)# from cleaning.py

The accuracy of source prediction is 0.8886884550084889


In [44]:
anime_cleaned.shape

(12201, 15)

We can see that we have no null values in the columns we need.

In [45]:
# let's check the result of this cleaning process.
print(((anime_cleaned.isnull().sum() / len(anime_cleaned))*100).sort_values(ascending = False))
print(f"Total number of records: {len(anime_cleaned)}")

rank              9.572986
synopsis          2.475207
japanses_title    0.155725
anime_id          0.000000
name              0.000000
english_title     0.000000
genre             0.000000
source            0.000000
duration          0.000000
episodes          0.000000
score             0.000000
members           0.000000
cover             0.000000
type              0.000000
rating            0.000000
dtype: float64
Total number of records: 12201


### Preparing the data

Now we are going to prepare the data for NearestNeighbors model

To do that we will call the next function from cleaning.py in utils folder.
- prepare_supervised_content_based

Since que are the columnas "episodes", "score" and "members", we will as well use:

- MinMaxScaler and calling fit_transform().

In [46]:
print(prepare_supervised_content_based.__doc__)


    This function prepares the content-based features for a supervised 
    learning model. It first splits the genres into separate columns and 
    gets unique genres. It then creates dummy variables for genres and 
    type, and sum up the columns for the same genre to have a single 
    column for each genre. Finally, it drops irrelevant columns and saves 
    the resulting dataframe to a CSV file. The function returns the resulting dataframe.
    


The steps of this function:
- Split the "genre" column into multiple columns  
- Get unique genre values 
- Create dummy variables for the genre columns  
- For each unique genre, sum up the corresponding dummy variables and add a new column for that genre  
- Create dummy variables for the "type" column  
- Create dummy variables for the "rating" column 
- Create dummy variables for the "source" column   
- Concatenate the "type" dummy variables with the existing DataFrame  
- Drop unnecessary columns  
- Create a new DataFrame that is a copy of the modified DataFrame
- Reset the index of the new DataFrame
- Save the modified DataFrame as a CSV file
- Return the modified DataFrame


In [47]:
anime_features = prepare_supervised_content_based(anime_cleaned) # from cleaning.py

anime_features.head(5)

In [48]:
anime_features.shape

(12201, 73)

In order to apply the MinMaxScaler to a dataset, typically we need to first "fit" the scaler to the data, which means calculating the minimum and maximum values for each feature in the dataset. This is done using the fit() method of the MinMaxScaler object. After fitting the scaler, you can then "transform" the data using the transform() method, which applies the scaling formula to each feature.

However, in some cases, if better to both fit the scaler to the data and transform the data in a single step. This is where the fit_transform() method comes in handy. Calling fit_transform() on a MinMaxScaler object will both fit the scaler to the data and transform the data in a single step.

Using a MinMaxScaler and calling fit_transform() is a common way to scale feature values to be within a specific range.

In [49]:
min_max = MinMaxScaler()
min_max_features = min_max.fit_transform(anime_features)

In [50]:
min_max_features.shape

(12201, 73)

In [51]:
min_max_features

array([[1.37589433e-02, 9.50413223e-01, 4.80139302e-01, ...,
        0.00000000e+00, 0.00000000e+00, 1.00000000e+00],
       [0.00000000e+00, 8.99449036e-01, 1.35742550e-01, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [1.37589433e-02, 8.77410468e-01, 2.79180047e-01, ...,
        0.00000000e+00, 0.00000000e+00, 1.00000000e+00],
       ...,
       [5.50357733e-04, 6.43250689e-01, 3.70840862e-04, ...,
        0.00000000e+00, 0.00000000e+00, 1.00000000e+00],
       [5.50357733e-04, 6.30853994e-01, 1.01586725e-04, ...,
        0.00000000e+00, 0.00000000e+00, 1.00000000e+00],
       [0.00000000e+00, 6.33608815e-01, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00]])

Rounding the values using np.round(min_max_features, 2) will round each value in min_max_features to 2 decimal places, which can make the resulting array easier to read and work with. This is especially useful when dealing with large arrays or matrices, where the values can be difficult to interpret if there are too many decimal places.

It's worth noting that the use of np.round() in this code is not strictly necessary and is largely a matter of personal preference. Some people might prefer to work with the original unscaled values or may choose to use a different rounding method depending on the specific requirements of their project.

In [52]:
np.round(min_max_features,2)

array([[0.01, 0.95, 0.48, ..., 0.  , 0.  , 1.  ],
       [0.  , 0.9 , 0.14, ..., 0.  , 0.  , 0.  ],
       [0.01, 0.88, 0.28, ..., 0.  , 0.  , 1.  ],
       ...,
       [0.  , 0.64, 0.  , ..., 0.  , 0.  , 1.  ],
       [0.  , 0.63, 0.  , ..., 0.  , 0.  , 1.  ],
       [0.  , 0.63, 0.  , ..., 0.  , 0.  , 0.  ]])

## Finding the best parameters for NearestNeighbors model

In [53]:
print(param_NearestNeighbors.__doc__)


    The function param_NearestNeighbors uses GridSearchCV from scikit-learn 
    to perform a grid search over a range of hyperparameters for the 
    NearestNeighbors model. It takes a dataframe df as input and returns 
    the best hyperparameters found during the grid search. The hyperparameters 
    being searched over include n_neighbors, radius, algorithm, leaf_size, 
    metric, and p. The scoring metric being used is "accuracy" and the refit 
    parameter is set to "precision_score". cv=2 sets the number of cross-validation 
    folds to 2, and n_jobs=-1 sets the number of CPU cores used to parallelize 
    the search to be the maximum available.
    


The GridSearchCV function works by training and evaluating the estimator (in this case, the NearestNeighbors() object) for each combination of parameter values specified in the param_grid argument. It then returns the combination of parameter values that resulted in the best performance, as determined by the specified scoring metric.

Overall, using GridSearchCV in NearestNeighbors() allows us to fine-tune the hyperparameters of the k-nearest neighbors algorithm to improve its accuracy and make better recommendations.

We will finde the best parameters passing the min_max_features.

Steps:
- Define dictionary of hyperparameters to test using GridSearchCV
- Create GridSearchCV object with NearestNeighbors algorithm and hyperparameters defined in the parametros dictionary
- Return the best hyperparameters found by the grid search

In [None]:
param_NearestNeighbors(min_max_features) # from testing.py 

## Building the model

In [55]:
print(model_NearestNeighbors.__doc__) # from testing.py 


    The function model_NearestNeighbors builds and trains a 
    k-Nearest Neighbors model on a given dataset, using specified 
    parameters. It then saves the indices of the nearest neighbors 
    to a file and returns them.
    


- Build and "train" the model using NearestNeighbors algorithm
	- algorithm: algorithm used to compute the nearest neighbors (‘auto’, ‘ball_tree’, ‘kd_tree’, ‘brute’)
	- leaf_size: leaf size passed to BallTree or KDTree
	- metric: distance metric used for the tree. Can be 'minkowski', 'euclidean', etc.
	- n_neighbors: number of neighbors to use for kneighbors queries
	- p: power parameter for the Minkowski metric. When p = 1, this is equivalent to using manhattan_distance
- Get the distances and indices of the nearest neighbors
	- distances: array representing the lengths to points, only present if return_distance=True
	- indices: indices of the nearest points in the population matrix
- Save the model to a file using joblib.dump
- Return the indices of the nearest neighbors

In [56]:
model_NearestNeighbors(min_max_features)

array([[    0,  3445, 10833, ...,   533,  2205,  1041],
       [    1,  7697,  4530, ...,  9351,   992,  7474],
       [    2,  3409, 11218, ...,  3136,  1181,  2198],
       ...,
       [12198,  7973,  5977, ...,  1850,  8626,  8464],
       [12199,  1825,   187, ...,  2339,  4352,  4780],
       [12200,  8769,  9684, ...,  4427,  8354,  2523]], dtype=int64)

## Get recommendations

To get the recommendations we will use the next functions from recommend.py in utils folder:
- print_similar_animes
- finding_the_closest_title
- from_title_to_index
- match_the_score
- from_index_to_title
- create_dict
- filtering_and
- filtering_or

In [57]:
print(print_similar_animes.__doc__)


    This function takes a user input anime name query and returns a list of recommended anime similar to the query.
    It uses a pre-trained model and a dataset of anime information to find recommendations. 
    If the user query has any misspelling, the function tries to find the closest match to 
    the query and provides recommendations based on that.
    


Steps of print_similar_animes:
- Load pre-trained k-Nearest Neighbors model from file
- Load anime data from CSV file
- Find the closest title in the anime dataset to the user's query
- If the distance score is 100, the user's query is an exact match for a title in the dataset
	- Get the index of the exact match in the dataset 
	- Get the indices of the k-nearest neighbors of the exact match
	- Remove the index of the exact match from the array of neighbors
	- For each neighbor index, get the name of the anime and add it to the list of recommendations
	- Return the list of recommendations
- If the distance score is not 100, the user's query is a misspelling or a partial match  
	- Ask the user if they meant the closest title found in the dataset
	- Get the index of the closest title in the dataset
	- Get the indices of the k-nearest neighbors of the closest title
	- Remove the index of the closest title from the array of neighbors
	- For each neighbor index, get the name of the anime and add it to the list of recommendations       
	- Return the list of recommendations

In [58]:
print(finding_the_closest_title.__doc__)


    Function that takes in a string title and a pandas DataFrame df as input arguments, 
    and returns a tuple containing the closest matching title to the input title 
    and the Levenshtein distance score between the closest title and the input title.
    in other words, the function returns the most similar title to the name a user typed
    


Steps of finding_the_closest_title:
- This function takes a string `title` and a pandas DataFrame `df` as input arguments.
- Create a new variable `anime` to hold the DataFrame `df` for readability.
- Calculate the Levenshtein distance between each title in the 'name' column of the DataFrame and the input `title`.
- The `match_the_score` function is used to calculate the distance score.
- The `enumerate` function adds an index number to each distance score.
- Sort the list of (index, distance score) tuples in descending order by the distance score. sorted_levenshtein_scores = sorted(levenshtein_scores, key=lambda x: x[1], reverse=True)
- Get the closest matching title to the input `title` by using the index of the highest scoring match.
- The `from_index_to_title` function is used to return the title string from the DataFrame given an index.
- Get the Levenshtein distance score of the closest matching title.
- Return a tuple containing the closest matching title and its Levenshtein distance score.

In [59]:
print(from_title_to_index.__doc__) # just one step


    Function to return the matched index number of the anime name
    


In [60]:
print(match_the_score.__doc__) # just one step


    Function to find the closest title, It uses Levenshtein Distance to calculate the differences between sequences
    


In [61]:
print(from_index_to_title.__doc__) # just one step


    Function to return the anime name that mtches de index number
    


The information resulted is pass to:
- create_dict
- filtering_and
- filtering_or

In [62]:
print(create_dict.__doc__)


    The create_dict() function takes in four arguments - names (list of anime names to search for), 
    gen (list of genres to filter by), typ (list of anime types to filter by), 
    method (string indicating whether to filter by "or" or "and"), 
    and an optional n parameter indicating the maximum number of results to return. 
    It reads in a pre-processed anime DataFrame, filters it based on the input criteria, 
    and returns a dictionary of the resulting rows. If there are no matches, 
    it returns a string indicating it.
    


Steps of create_dict:
- This function takes in a list of anime titles `names`, lists of `gen`res and `typ`es, a filtering method `method`, and an optional number of results `n`.
- Load the anime dataframe from a CSV file using pandas.
- Filter the anime dataframe to only include titles that match those in the input list `names`.
- Remove the 'anime_id' and 'members' columns from the resulting dataframe.
- Reset the index of the resulting dataframe.
- Apply a filtering method based on the input `method`.
- If 'or', use the `filtering_or()` function to filter the dataframe.
- If 'and', use the `filtering_and()` function to filter the dataframe.
- If `method` is neither 'or' nor 'and', raise a ValueError.
- Drop any duplicate titles from the resulting dataframe.
- Limit the resulting dataframe to the first `n` rows.
- If the resulting dataframe is empty, print an error message and return None.
- Otherwise, convert the resulting dataframe to a dictionary and return the dictionary.

In [63]:
print(filtering_and.__doc__)


    This function takes a DataFrame df, a list of genres, and a list of types as input arguments. 
    The function first creates a boolean mask genre_mask by applying a lambda function to 
    the 'genre' column of the DataFrame. The lambda function checks if the value is a 
    string using isinstance(x, str) and if all genres in the genres list are present 
    in the string, which is split by comma and space using x.split(', '). 
    The all() function returns True if all genres in the genres list are present 
    in the string. The resulting genre_mask will be True for rows where the genre 
    column contains all of the genres in the genres list.

    Then the function creates another boolean mask type_mask by using the isin() 
    method to check if each value in the 'type' column of the DataFrame is in the types list.

    Finally, the function applies both masks to the DataFrame df using the & operator 
    to create a new DataFrame filtered_df that includes only rows where b

Steps of filtering_and:
- This function takes a DataFrame `df`, a list of `genres`, and a list of `types` as input arguments.
- Create a boolean mask that filters rows where the genre column contains all of the genres in the `genres` list.
- Create a boolean mask that filters rows where the type column is in the `types` list.
- Apply both masks to the DataFrame `df` and create a new DataFrame `filtered_df` that includes only rows where both masks are True.
- Return the filtered DataFrame.

In [64]:
print(filtering_or.__doc__)


    The code defines a function "filtering_or" that filters a pandas dataframe based on user-defined 
    genres and types using an "OR" method. The function allows the user to select one or all possible 
    genres and types and returns a filtered dataframe with the selected genres and types. 
    The function also splits the genre and type columns and explodes them to account for multiple entries.
    


Steps of filtering_or:
- Make a copy of the input DataFrame
- Split the genre column into a list of genres
- Explode the genre column to create a new row for each genre in the list
- If genres are specified and 'ALL' is not one of them, filter the DataFrame to keep only rows where the genre is in the specified list  
- If types are specified and 'ALL' is not one of them, filter the DataFrame to keep only rows where the type is in the specified list
- If both genres and types are specified
- If 'ALL' is in the genres list, set genres to be all the unique genres in the filtered DataFrame
- If 'ALL' is in the types list, set types to be all the unique types in the filtered DataFrame
- Filter the DataFrame to keep only rows where the genre is in the genres list AND the type is in the types list
- Return the filtered DataFrame

In [69]:
# We can get the recommendation as a dictionary
# We selec the name of the anime we want to find similitudes
# Then the genre we want
# Then the type we want (or write "All" if we shoose "or" filter)
# We must select a type or filtering, "or"/"and" 
# Then the number of suggestions we have(we might get less if there not so many o none if there is no matches)

create_dict(print_similar_animes("Naruto"),["Shounen"],["TV"],"or",2)

I guess you misspelled the name
 Are you looking similitudes for the anime named [1mnaruto[0m? 
Here are the recommendations:
or


[{'name': 'eyeshield 21',
  'english_title': 'Eyeshield 21',
  'japanses_title': 'アイシールド21',
  'genre': 'Shounen',
  'source': 'Manga',
  'duration': '23 min per ep',
  'episodes': 145.0,
  'score': 7.92,
  'rank': 702.0,
  'synopsis': 'Shy, reserved, and small-statured, Deimon High School student Sena Kobayakawa is the perfect target for bullies. However, as a result of running errands throughout his life, Sena has become agile and developed a skill for avoiding crowds of people. After the cunning Youichi Hiruma—captain of the Deimon Devil Bats football team—witnesses Sena\'s rapid legs in motion, he coerces the timid boy into joining his squad. As Hiruma wants to conceal Sena\'s identity from other clubs, Sena is forced to hide under the visored helmet of "Eyeshield 21," a mysterious running back wearing the number 21 jersey. The legendary Eyeshield 21 can supposedly run at the speed of light and has achieved remarkable feats in the United States during his time at the Notre Dame Col

In [36]:
def get_info():
    # Import Libraries
    import requests, json, os, sys, time
    import pandas as pd
    from datetime import datetime

    '''
    Preparing folder variables.
    '''
    os.chdir(os.path.dirname(sys.path[0])) # This command makes the notebook the main path and can work in cascade.
    main_folder = sys.path[0]
    data_folder = (main_folder + "\data")

    '''
    Creating time variables.
    '''
    current_time = time.strftime("%H_%M_%S",time.localtime())
    date = datetime.now()
    actual_date = date.strftime("%Y_%m_%d")


    #Remove the limit to see the df
    pd.set_option('display.max_columns', None)

    #Creating the necessary lists
    anime_list = []

    '''
    To check if there is an empty value. If the category is empty, it returns None.
    '''
    def try_it(i):
        try:
            return i["name"]
        except:
            return None

    '''
    Def with a try check to get the finishing time of an anime, in case the anime is a movie, then it returns the release time.
    If the anime is not a movie, it checks for the finishing time. If there is no finishing time, it returns None 
    '''

    url = "https://api.jikan.moe/v4/anime" # url of the api

    r = requests.get(url)# request to a web page (url)    
    
    data = r.json() # creating a variable for all the info we get

    n_pages = data['pagination']['last_visible_page']
    
    for page in range (1,n_pages +1):
        r_page = requests.get(url + '?page=' + str(page)) # request to a web page (url)
        content = r_page.json()
        print (page)
        data = content["data"]
        time.sleep(1)
        for char in data: 
            
            try: # First try yo check if the page exist or not
                # Creation of the necessary dictionary o store the values in each loop # We specify which information to get in each Item
                anime_dict = {"Anime_id" : char["mal_id"] if char["mal_id"]  else None,
                            "Cover" : char["images"]["jpg"]["large_image_url"] if char["images"]["jpg"]["large_image_url"]  else None,
                            "English_Title" : char["title"] if char["title"]  else None,
                            "Japanses_Title" : char["title_japanese"] if char["title_japanese"]  else None,
                            "Type" : char["type"] if char["type"]  else None,
                            "Source" : char["source"] if char["status"] else None,
                            "Audience" : [try_it(i) for i in char["demographics"]], # List comprehension calling the Def try_it
                            "N_Episodes" : (int(char["episodes"])) if char["episodes"] else 0,
                            "Duration" : char["duration"] if char["duration"]  else None,
                            "Rating" : char["rating"] if char["rating"] else None,
                            "Score" : char["score"] if char["score"]  else None,
                            "Scored_by" : char["scored_by"] if char["scored_by"]  else None,
                            "Rank" : (int(char["rank"])) if char["rank"] else None,
                            "Season" : char["season"] if char["season"] else None,
                            "Genre" : [try_it(i) for i in char["genres"]],# List comprehension calling the Def try_it
                            "Theme" : [try_it(i) for i in char["themes"]],# List comprehension calling the Def try_it
                            "Released" : (int(char["aired"]["prop"]["from"]["year"])) if char["aired"]["prop"]["from"]["year"] else None, # If else in one line
                            "Studios" : [try_it(i) for i in char["studios"]],# List comprehension calling the Def try_it
                            "Producers" : [try_it(i) for i in char["producers"]],# List comprehension calling the Def try_it
                            "Synopsis" : char["synopsis"] if char["synopsis"]  else None,
                            }
                            
                anime_list.append(anime_dict) # Append the loop info to anime_list
                
            # Ending of the first try specifying the error
            except:
                if r_page.status_code == 429: #If there is a 429 error we show it on screen and tell us the respuesta.reason
                    print (f"El código de estado de la petición es: {r_page.status_code}. Estatus {r_page.reason}. No se puede recoger información de la página {id}\n")
                else:
                    #If there is a any other error we show it on screen and tell us the respuesta.reason
                    print (f"El código de estado de la petición es: {r_page.status_code}. Estatus {r_page.reason}. No se puede recoger información de la página {id}\n")
                continue

    # We create df from anime_list and save it in a csv file adding actual date and time variables to the name
    anime_df = pd.DataFrame(anime_list)
    anime_csv = os.path.join("anime_" + actual_date+ "_" +current_time + ".csv")# Saving the image to the images folder
    anime_df.to_csv(anime_csv, sep = ';', index = False)
    print(f'anime_{actual_date}{current_time}.csv created\n\n')

In [37]:
get_info()

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
