In [1]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, when, round
from pyspark.ml.evaluation import RegressionEvaluator
from pyspark.ml.recommendation import ALS
from pyspark.sql.functions import explode
# Inicializamos la sesion de Spark
spark = SparkSession.builder.appName("SistemaRecomendacion_ALS").config("spark.executor.memory", "8g").config("spark.executor.cores", "6").config("spark.driver.memory","8g").getOrCreate()

# Cargamos el dataset
df = spark.read.option("delimiter", ",").option("quote", '"').option("escape", '"').csv("data/anime.csv", header=True, inferSchema=True)


## Funciones API JIKAN

In [None]:
# Importe de librerías para la api
import requests
import json
from IPython.display import Image, display
from rich import print
import time

In [3]:
# Función para obtener la información del anime a través de la API de Jikan
def obtener_info_anime(id):
    # Construimos la URL de la API
    url = f"https://api.jikan.moe/v4/anime/{id}/full"
    # Hacemos una solicitud GET a la API
    response = requests.get(url)
    # Convertimos la respuesta en formato JSON a un diccionario de Python
    data = json.loads(response.text)
    # Devolvemos los datos 
    return data

In [4]:
def mostrar_info_anime(data):
    # Accedemos a los datos del anime
    data = data['data']
    # Imprimimos el título, la sinopsis y el trailer del anime
    print(f"Titulo: {data['title']}")
    print(f"Sinopsis: {data['synopsis']}")

    # Buscamos cualquier URL de imagen disponible en 'images'
    image_url = None
    for key, value in data['images'].items():
        if isinstance(value, dict) and 'image_url' in value:
            image_url = value['image_url']
    if image_url:
        display(Image(url=image_url))
    else:
        print("No se encontró la imagen del anime.")

    print(f"Trailer: {data['trailer']['url']}")
    print("-----------------------------------------------------------------")

In [5]:
def mostrar_animes_series(lista_de_recomendaciones):
    # Iteramos sobre la lista de IDs y obtenemos y mostramos la información de cada anime
    for anime_id in lista_de_recomendaciones:
        data = obtener_info_anime(anime_id)
        if 'data' not in data:
            print(f"El ID {anime_id} no existe.")
        else:
            mostrar_info_anime(data)
        time.sleep(1)

# Limpieza de los datos

In [6]:
# Visualizamos todo el dataset de animes
df.show()

+---+--------------------+-----+--------------------+--------------------+------------------------------+-----+--------+--------------------+-----------+--------------------+--------------------+----------------+-----------+---------------+--------------------+------+----------+-------+---------+--------+---------+-------+-------+-------------+--------+--------+--------+--------+--------+-------+-------+-------+-------+-------+
| ID|                Name|Score|              Genres|        English name|                 Japanese name| Type|Episodes|               Aired|  Premiered|           Producers|           Licensors|         Studios|     Source|       Duration|              Rating|Ranked|Popularity|Members|Favorites|Watching|Completed|On-Hold|Dropped|Plan to Watch|Score-10| Score-9| Score-8| Score-7| Score-6|Score-5|Score-4|Score-3|Score-2|Score-1|
+---+--------------------+-----+--------------------+--------------------+------------------------------+-----+--------+----------------

In [7]:
# Visualizamos las filas con valores nulos
df.where(df["Type"] == "Unknown").count()

37

In [8]:
# Visualizamos los tipos de animes que existen (Movie, TV, OVA, etc)
df.select("Type").distinct().show(20, False)

+-------+
|Type   |
+-------+
|TV     |
|Special|
|Unknown|
|OVA    |
|Music  |
|Movie  |
|ONA    |
+-------+



In [9]:
df = df.drop(df["Premiered"])

In [11]:
# Se filtra el dataset para que solo contenga los animes de tipo Movie y TV
df_filtered = df.filter((df["Type"] == "Movie") | (df["Type"] == "TV"))

In [12]:

# Convertimos los valores de las columnas de Score-1 a Score-10 a valores numericos cambiando los Unknown por 0
df_filtered = df_filtered.withColumn("Score-1", when(df_filtered["Score-1"] == "Unknown", 0).otherwise(df_filtered["Score-1"])) \
    .withColumn("Score-2", when(df_filtered["Score-2"] == "Unknown", 0).otherwise(df_filtered["Score-2"])) \
    .withColumn("Score-3", when(df_filtered["Score-3"] == "Unknown", 0).otherwise(df_filtered["Score-3"])) \
    .withColumn("Score-4", when(df_filtered["Score-4"] == "Unknown", 0).otherwise(df_filtered["Score-4"])) \
    .withColumn("Score-5", when(df_filtered["Score-5"] == "Unknown", 0).otherwise(df_filtered["Score-5"])) \
    .withColumn("Score-6", when(df_filtered["Score-6"] == "Unknown", 0).otherwise(df_filtered["Score-6"])) \
    .withColumn("Score-7", when(df_filtered["Score-7"] == "Unknown", 0).otherwise(df_filtered["Score-7"])) \
    .withColumn("Score-8", when(df_filtered["Score-8"] == "Unknown", 0).otherwise(df_filtered["Score-8"])) \
    .withColumn("Score-9", when(df_filtered["Score-9"] == "Unknown", 0).otherwise(df_filtered["Score-9"])) \
    .withColumn("Score-10", when(df_filtered["Score-10"] == "Unknown", 0).otherwise(df_filtered["Score-10"]))
    

In [13]:
# Visualizamos el dataset con Scores en Unknown y verificamos que los valores de Scores-1 al Scores-10 sean numericos
df_filtered.where(df_filtered["Score"] == "Unknown").show()


+----+--------------------+-------+--------------------+--------------------+-------------------------------------+-----+--------+--------------------+--------------------+---------+--------------------+---------+---------------+--------------------+-------+----------+-------+---------+--------+---------+-------+-------+-------------+--------+-------+-------+-------+-------+-------+-------+-------+-------+-------+
|  ID|                Name|  Score|              Genres|        English name|                        Japanese name| Type|Episodes|               Aired|           Producers|Licensors|             Studios|   Source|       Duration|              Rating| Ranked|Popularity|Members|Favorites|Watching|Completed|On-Hold|Dropped|Plan to Watch|Score-10|Score-9|Score-8|Score-7|Score-6|Score-5|Score-4|Score-3|Score-2|Score-1|
+----+--------------------+-------+--------------------+--------------------+-------------------------------------+-----+--------+--------------------+-------------

In [14]:
## Se muestra uno de los animes para verificar un cambio en la siguinete ejecución
df_filtered.where(df_filtered["ID"] == "1547").show()

+----+----------------+-------+--------------------+------------+--------------+----+--------+--------------------+---------+---------+-----------------+------+---------------+------------+-------+----------+-------+---------+--------+---------+-------+-------+-------------+--------+-------+-------+-------+-------+-------+-------+-------+-------+-------+
|  ID|            Name|  Score|              Genres|English name| Japanese name|Type|Episodes|               Aired|Producers|Licensors|          Studios|Source|       Duration|      Rating| Ranked|Popularity|Members|Favorites|Watching|Completed|On-Hold|Dropped|Plan to Watch|Score-10|Score-9|Score-8|Score-7|Score-6|Score-5|Score-4|Score-3|Score-2|Score-1|
+----+----------------+-------+--------------------+------------+--------------+----+--------+--------------------+---------+---------+-----------------+------+---------------+------------+-------+----------+-------+---------+--------+---------+-------+-------+-------------+--------+--

In [15]:
# Se calcula el Score promedio de cada anime si no tiene, y se guarda en la columna Score en otro caso mantiene el Score que tiene
df_filtered = df_filtered.withColumn("Score", when(df_filtered["Score"] == "Unknown", (df_filtered["Score-1"] *1 + df_filtered["Score-2"] *2 + df_filtered["Score-3"] *3 + df_filtered["Score-4"] *4 + df_filtered["Score-5"] *5 + df_filtered["Score-6"] *6 + df_filtered["Score-7"] *7 + df_filtered["Score-8"] *8 + df_filtered["Score-9"] *9 + df_filtered["Score-10"] *10) / (df_filtered["Score-1"] + df_filtered["Score-2"] + df_filtered["Score-3"] + df_filtered["Score-4"] + df_filtered["Score-5"] + df_filtered["Score-6"] + df_filtered["Score-7"] + df_filtered["Score-8"] + df_filtered["Score-9"] + df_filtered["Score-10"])).otherwise(df_filtered["Score"]))

In [16]:
# Redondeamos el Score a 2 decimales
df_filtered = df_filtered.withColumn("Score", round(df_filtered.Score, 2))

In [17]:
# Se muestra uno de los animes para verificar que el Score se haya calculado correctamente
df_filtered.where(df_filtered["ID"] == "1547").show()

+----+----------------+-----+--------------------+------------+--------------+----+--------+--------------------+---------+---------+-----------------+------+---------------+------------+-------+----------+-------+---------+--------+---------+-------+-------+-------------+--------+-------+-------+-------+-------+-------+-------+-------+-------+-------+
|  ID|            Name|Score|              Genres|English name| Japanese name|Type|Episodes|               Aired|Producers|Licensors|          Studios|Source|       Duration|      Rating| Ranked|Popularity|Members|Favorites|Watching|Completed|On-Hold|Dropped|Plan to Watch|Score-10|Score-9|Score-8|Score-7|Score-6|Score-5|Score-4|Score-3|Score-2|Score-1|
+----+----------------+-----+--------------------+------------+--------------+----+--------+--------------------+---------+---------+-----------------+------+---------------+------------+-------+----------+-------+---------+--------+---------+-------+-------+-------------+--------+-------+

In [18]:
# Comprobación de la media de los scores
# num = (8*10+5*9+6*8+20*7+28*6+31*5+6*4+6*3+2*2+10*1)/(8+5+6+20.0+28+31+6+6+2+10.0)
# num

In [19]:
# Se visualiza el dataset filtrado nuevamente
df_filtered.show()

+---+--------------------+-----+--------------------+--------------------+------------------------------+-----+--------+--------------------+--------------------+--------------------+----------------+-----------+---------------+--------------------+------+----------+-------+---------+--------+---------+-------+-------+-------------+--------+--------+--------+--------+--------+-------+-------+-------+-------+-------+
| ID|                Name|Score|              Genres|        English name|                 Japanese name| Type|Episodes|               Aired|           Producers|           Licensors|         Studios|     Source|       Duration|              Rating|Ranked|Popularity|Members|Favorites|Watching|Completed|On-Hold|Dropped|Plan to Watch|Score-10| Score-9| Score-8| Score-7| Score-6|Score-5|Score-4|Score-3|Score-2|Score-1|
+---+--------------------+-----+--------------------+--------------------+------------------------------+-----+--------+--------------------+-------------------

In [20]:
# se visualiza el numero de filas del dataset filtrado
df_filtered.count()

8037

In [21]:
# Visualizamos el numero de filas sin valoraciones
df_filtered.where((df_filtered["Score-1"] == 0) & (df_filtered["Score-2"] == 0) & (df_filtered["Score-3"] == 0) & (df_filtered["Score-4"] == 0) & (df_filtered["Score-5"] == 0) & (df_filtered["Score-6"] == 0) & (df_filtered["Score-7"] == 0) & (df_filtered["Score-8"] == 0) & (df_filtered["Score-9"] == 0) & (df_filtered["Score-10"] == 0)).count()

188

In [22]:
# Visualizamos las filas sin valoraciones
df_filtered.where((df_filtered["Score-1"] == 0) & (df_filtered["Score-2"] == 0) & (df_filtered["Score-3"] == 0) & (df_filtered["Score-4"] == 0) & (df_filtered["Score-5"] == 0) & (df_filtered["Score-6"] == 0) & (df_filtered["Score-7"] == 0) & (df_filtered["Score-8"] == 0) & (df_filtered["Score-9"] == 0) & (df_filtered["Score-10"] == 0)).show()

+-----+--------------------+-----+--------------------+--------------------+----------------------------------+-----+--------+--------------+--------------------+----------+------------+------------+--------+--------------------+-------+----------+-------+---------+--------+---------+-------+-------+-------------+--------+-------+-------+-------+-------+-------+-------+-------+-------+-------+
|   ID|                Name|Score|              Genres|        English name|                     Japanese name| Type|Episodes|         Aired|           Producers| Licensors|     Studios|      Source|Duration|              Rating| Ranked|Popularity|Members|Favorites|Watching|Completed|On-Hold|Dropped|Plan to Watch|Score-10|Score-9|Score-8|Score-7|Score-6|Score-5|Score-4|Score-3|Score-2|Score-1|
+-----+--------------------+-----+--------------------+--------------------+----------------------------------+-----+--------+--------------+--------------------+----------+------------+------------+-------

In [23]:
# Visualizamos las filas sin valoraciones de otra manera
df_filtered.filter(df_filtered.Score.isNull()).show()

+-----+--------------------+-----+--------------------+--------------------+----------------------------------+-----+--------+--------------+--------------------+----------+------------+------------+--------+--------------------+-------+----------+-------+---------+--------+---------+-------+-------+-------------+--------+-------+-------+-------+-------+-------+-------+-------+-------+-------+
|   ID|                Name|Score|              Genres|        English name|                     Japanese name| Type|Episodes|         Aired|           Producers| Licensors|     Studios|      Source|Duration|              Rating| Ranked|Popularity|Members|Favorites|Watching|Completed|On-Hold|Dropped|Plan to Watch|Score-10|Score-9|Score-8|Score-7|Score-6|Score-5|Score-4|Score-3|Score-2|Score-1|
+-----+--------------------+-----+--------------------+--------------------+----------------------------------+-----+--------+--------------+--------------------+----------+------------+------------+-------

In [24]:
# Se filtra por los animes que tienen valoraciones
df_filtered = df_filtered.filter(df_filtered.Score.isNotNull())

## Entrenamiento del Algoritmo ALS

In [25]:
# Se cargan los valores del csv de ratings
df_ratings = spark.read.csv("data/rating_complete.csv", header=True, sep=",", encoding="UTF-8", inferSchema=True)

# Se cargan los valores de ratings del usuario 66666
df_ratings_ep = spark.read.csv("data/valoraciones_EP.csv", header=False, sep=",", encoding="UTF-8", inferSchema=True)
# Como el csv de datos no tiene header, se reemplaza el nombre por defecto por los que se quieren
df_ratings_ep = df_ratings_ep.withColumnRenamed("_c0", "user_id")
df_ratings_ep = df_ratings_ep.withColumnRenamed("_c1", "anime_id")
df_ratings_ep = df_ratings_ep.withColumnRenamed("_c2", "rating")

In [26]:
print("El numero de valoraciones: " + str(df_ratings.count()))
print("El numero de valoraciones del usuario EP: " + str(df_ratings_ep.count()))

In [27]:
# Se renombra la columna de rating por Category_rating para evitar confusiones
df_filtered = df_filtered.withColumnRenamed("Rating","Category_rating")

In [28]:
# Se realiza un union de los dos dataframes de valoraciones
df_ratings_completo = df_ratings.union(df_ratings_ep)

In [29]:
df_ratings_completo.show()

+-------+--------+------+
|user_id|anime_id|rating|
+-------+--------+------+
|      0|     430|   9.0|
|      0|    1004|   5.0|
|      0|    3010|   7.0|
|      0|     570|   7.0|
|      0|    2762|   9.0|
|      0|     431|   8.0|
|      0|     578|  10.0|
|      0|     433|   6.0|
|      0|    1571|  10.0|
|      0|     121|   9.0|
|      0|     356|   9.0|
|      0|    1250|   7.0|
|      0|    2913|   6.0|
|      0|    1689|   6.0|
|      0|      68|   6.0|
|      0|    1829|   7.0|
|      0|     600|   6.0|
|      0|    3418|   9.0|
|      0|     164|   8.0|
|      0|    1894|   7.0|
+-------+--------+------+
only showing top 20 rows



In [30]:
# Se realiza un join con el dataframe de animes, para conocer cuales son peliculas y cuales son series
df_ratings_completo_movies = df_ratings_completo.join(df_filtered, df_ratings_completo["anime_id"] == df_filtered["ID"], "inner")

In [31]:
# Se obtienen las peliculas valoradas por los usuarios
df_ratings_movies = df_ratings_completo_movies.filter(df_ratings_completo_movies["Type"] == "Movie").select("user_id", "anime_id", "rating")

In [32]:
# Se obtienen las series valoradas por los usuarios
df_ratings_series = df_ratings_completo_movies.filter(df_ratings_completo_movies["Type"] == "TV").select("user_id", "anime_id", "rating")

In [33]:
df_ratings_series.show()

+-------+--------+------+
|user_id|anime_id|rating|
+-------+--------+------+
|      0|    3010|   7.0|
|      0|    2762|   9.0|
|      0|    1571|  10.0|
|      0|     121|   9.0|
|      0|     356|   9.0|
|      0|    1250|   7.0|
|      0|    2913|   6.0|
|      0|      68|   6.0|
|      0|     600|   6.0|
|      0|    3418|   9.0|
|      0|    2034|   8.0|
|      0|    2547|   7.0|
|      0|     169|   7.0|
|      0|     174|   4.0|
|      0|    2543|   7.0|
|      0|    4086|   6.0|
|      0|     419|   8.0|
|      1|   22535|   9.0|
|      1|   38000|   9.0|
|      1|   18679|   6.0|
+-------+--------+------+
only showing top 20 rows



## Dividir los sets de datos

In [72]:
# Se divide el set de datos en training y test
(training_movies, test_movies) = df_ratings_movies.randomSplit([0.8, 0.2])
(training_series, test_series) = df_ratings_series.randomSplit([0.8, 0.2])

In [73]:
# Se crea el modelo ALS para las peliculas
als_movie = ALS(maxIter=5, regParam=0.1, userCol="user_id", itemCol="anime_id", ratingCol="rating", coldStartStrategy="drop")
model_movie = als_movie.fit(training_movies)

In [74]:
# Se crea el modelo ALS para las series
als_series = ALS(maxIter=5, regParam=0.1, userCol="user_id", itemCol="anime_id", ratingCol="rating", coldStartStrategy="drop")
model_series = als_series.fit(training_series)

In [75]:
# Se realizan las predicciones para las peliculas y las series
predictions_movies = model_movie.transform(test_movies)
predictions_series = model_series.transform(test_series)

In [76]:
predictions_movies.show()

+-------+--------+------+----------+
|user_id|anime_id|rating|prediction|
+-------+--------+------+----------+
|     27|    2890|   6.0|  5.283653|
|     27|   10408|  10.0|   6.62606|
|     27|   10589|   9.0|   6.71727|
|     27|   16782|   6.0| 6.8473086|
|     27|   21647|   6.0| 6.8202934|
|     27|   25015|   8.0| 6.8204904|
|     27|   28851|  10.0|  8.480697|
|     28|     431|  10.0|  7.639615|
|     28|     460|   8.0|  7.831153|
|     28|     461|   7.0| 7.7650275|
|     28|     462|   7.0|  8.024772|
|     28|     465|  10.0|  7.923398|
|     28|     900|   8.0|  7.687245|
|     28|    1119|   8.0|  7.703452|
|     28|    1122|   7.0|  7.743541|
|     28|    2385|  10.0|  7.423892|
|     28|    2490|   9.0| 7.4940243|
|     28|    2847|   7.0| 7.9944606|
|     28|    4793|   8.0| 7.1318827|
|     28|    5526|   8.0|  7.202059|
+-------+--------+------+----------+
only showing top 20 rows



In [77]:
predictions_series.show()

+-------+--------+------+----------+
|user_id|anime_id|rating|prediction|
+-------+--------+------+----------+
|     27|     121|   8.0| 7.9687066|
|     27|     934|   7.0| 7.7952833|
|     27|   13601|   8.0|  8.316835|
|     27|   18153|  10.0|  7.588115|
|     27|   22319|  10.0| 7.7941055|
|     27|   23283|   8.0|   8.18337|
|     27|   30015|   8.0|   7.97096|
|     27|   30276|   8.0|  8.434945|
|     27|   32901|   8.0|  6.603804|
|     27|   33836|   5.0|  6.229514|
|     27|   34542|   9.0|  7.736362|
|     27|   35062|  10.0| 7.9684467|
|     27|   35434|   7.0| 6.1621623|
|     27|   36511|  10.0| 6.7174935|
|     28|      20|   9.0|  9.446367|
|     28|     226|  10.0|  9.348526|
|     28|     419|   8.0|  8.542862|
|     28|    1535|  10.0|  9.198162|
|     28|    1564|  10.0| 8.3417425|
|     28|    3588|  10.0|  8.428232|
+-------+--------+------+----------+
only showing top 20 rows



In [78]:
# Se calcula el error para las peliculas y las series
evaluator = RegressionEvaluator(metricName="rmse", labelCol="rating", predictionCol="prediction")
rmse_movies = evaluator.evaluate(predictions_movies)
rmse_series = evaluator.evaluate(predictions_series)
print("Root-mean-square error movies = " + str(rmse_movies))
print("Root-mean-square error series = " + str(rmse_series))

In [79]:
# Se obtiene el usuario EP
user_ep = df_ratings_movies.select(als_movie.getUserCol()).where(df_ratings_movies["user_id"] == 66666).distinct()

In [80]:
# Se obtienen las recomendaciones para el usuario EP
recomendation_userEp_movies = model_movie.recommendForUserSubset(user_ep, 5)
recomendation_userEp_series = model_series.recommendForUserSubset(user_ep, 5)

In [81]:
# Se obtienen las recomendaciones de peliculas y series para el usuario ep
recomendation_userEp_movies.show(20, False)
recomendation_userEp_series.show(20, False)

+-------+-------------------------------------------------------------------------------------------+
|user_id|recommendations                                                                            |
+-------+-------------------------------------------------------------------------------------------+
|66666  |[{33132, 9.622806}, {12355, 9.478018}, {40211, 9.433542}, {28957, 9.3591}, {164, 9.230429}]|
+-------+-------------------------------------------------------------------------------------------+

+-------+----------------------------------------------------------------------------------------------+
|user_id|recommendations                                                                               |
+-------+----------------------------------------------------------------------------------------------+
|66666  |[{5114, 9.065641}, {28977, 9.008711}, {35180, 8.998851}, {11061, 8.985007}, {42923, 8.975994}]|
+-------+------------------------------------------------------------

In [82]:
recomendation_userEp_movies.printSchema()
recomendation_userEp_series.printSchema()

root
 |-- user_id: integer (nullable = false)
 |-- recommendations: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- anime_id: integer (nullable = true)
 |    |    |-- rating: float (nullable = true)

root
 |-- user_id: integer (nullable = false)
 |-- recommendations: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- anime_id: integer (nullable = true)
 |    |    |-- rating: float (nullable = true)



In [83]:
# Separamos las recomendaciones de peliculas en columnas
df_recomendations_movies_userEp = recomendation_userEp_movies.select("user_id", explode("recommendations").alias("recommendations"))
# Separamos las recomendaciones de series en columnas
df_recomendations_series_userEp = recomendation_userEp_series.select("user_id", explode("recommendations").alias("recommendations"))

In [84]:
# Se obtiene el user_id, anime_id y rating de las recomendaciones de peliculas y series para el usuario EP
df_recomendations_movies_userEp = df_recomendations_movies_userEp.select("user_id", "recommendations.anime_id", "recommendations.rating")
df_recomendations_series_userEp = df_recomendations_series_userEp.select("user_id", "recommendations.anime_id", "recommendations.rating")

In [85]:
df_recomendations_movies_userEp.show(truncate=False)
df_recomendations_series_userEp.show(truncate=False)

+-------+--------+--------+
|user_id|anime_id|rating  |
+-------+--------+--------+
|66666  |33132   |9.622806|
|66666  |12355   |9.478018|
|66666  |40211   |9.433542|
|66666  |28957   |9.3591  |
|66666  |164     |9.230429|
+-------+--------+--------+

+-------+--------+--------+
|user_id|anime_id|rating  |
+-------+--------+--------+
|66666  |5114    |9.065641|
|66666  |28977   |9.008711|
|66666  |35180   |8.998851|
|66666  |11061   |8.985007|
|66666  |42923   |8.975994|
+-------+--------+--------+



In [86]:
# Se realiza un join con el dataframe de animes, para conocer cuales son las peliculas
df_movies_recomended = df_recomendations_movies_userEp.join(df_filtered, df_recomendations_movies_userEp["anime_id"] == df_filtered["ID"], "inner")

In [87]:
df_movies_recomended = df_movies_recomended.orderBy("Score", ascending=False).select("anime_id", "Name", "English name")

In [88]:
df_movies_recomended.show(truncate=False)

+--------+-----------------------------------+-----------------+
|anime_id|Name                               |English name     |
+--------+-----------------------------------+-----------------+
|164     |Mononoke Hime                      |Princess Mononoke|
|12355   |Ookami Kodomo no Ame to Yuki       |Wolf Children    |
|28957   |Mushishi Zoku Shou: Suzu no Shizuku|Unknown          |
|40211   |Luo Xiao Hei Zhan Ji (Movie)       |The Legend of Hei|
|33132   |Shin Kachikachi Yama               |Unknown          |
+--------+-----------------------------------+-----------------+



In [89]:
# Se realiza un join con el dataframe de animes, para conocer cuales son las series
df_series_recomended = df_recomendations_series_userEp.join(df_filtered, df_recomendations_series_userEp["anime_id"] == df_filtered["ID"], "inner")

In [90]:
df_series_recomended = df_series_recomended.orderBy("Score", ascending=False).select("anime_id", "Name", "English name")

In [91]:
df_series_recomended.show(truncate=False)

+--------+--------------------------------+-------------------------------------+
|anime_id|Name                            |English name                         |
+--------+--------------------------------+-------------------------------------+
|5114    |Fullmetal Alchemist: Brotherhood|Fullmetal Alchemist:Brotherhood      |
|28977   |Gintama°                        |Gintama Season 4                     |
|11061   |Hunter x Hunter (2011)          |Hunter x Hunter                      |
|35180   |3-gatsu no Lion 2nd Season      |March Comes In Like A Lion 2nd Season|
|42923   |SK∞                             |SK8 the Infinity                     |
+--------+--------------------------------+-------------------------------------+



In [92]:
# Se obtiene una lista de los anime_id de las peliculas y series recomendadas
lista_anime_id_movies = df_recomendations_movies_userEp.select("anime_id").collect()
lista_anime_id_series = df_recomendations_series_userEp.select("anime_id").collect()

In [93]:
array_lista_anime_id_movies = [row[0] for row in lista_anime_id_movies]
array_lista_anime_id_series = [row[0] for row in lista_anime_id_series]

## Información extra de las películas y series

In [94]:
# Se muestran las peliculas y series recomendadas a traves de la API de Jikan
mostrar_animes_series(array_lista_anime_id_movies)
mostrar_animes_series(array_lista_anime_id_series)