In [20]:
from pandas import read_csv,concat
from sklearn.preprocessing import LabelEncoder
from pyspark.sql import SparkSession
from pyspark.ml.recommendation import ALS
from pyspark.sql import Row


In [6]:
ratings_df= read_csv("/kaggle/input/myanimelist-dataset/user-filtered.csv")


In [7]:
N_USERS = 30000
rating_df = ratings_df[ratings_df['rating'] != -1]
rating_df = rating_df[rating_df["user_id"]<=N_USERS]


In [8]:
# Créer une session Spark
spark = SparkSession.builder \
    .appName("AnimeRecommendation") \
    .getOrCreate()

# Étape 1 : Charger les données dans Spark DataFrame
# Convertir le DataFrame pandas en DataFrame Spark
rating_df_spark = spark.createDataFrame(
rating_df[['user_id', 'anime_id', 'rating']].values.tolist(),  # Convertir en liste de listes
    schema=["user_id", "anime_id", "rating"]  )

Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
24/12/01 13:27:26 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


In [9]:
rating_df_spark.show()

24/12/01 13:31:10 WARN TaskSetManager: Stage 0 contains a task of very large size (22288 KiB). The maximum recommended task size is 1000 KiB.
24/12/01 13:31:15 WARN PythonRunner: Detected deadlock while completing task 0.0 in stage 0 (TID 0): Attempting to kill Python Worker
                                                                                

+-------+--------+------+
|user_id|anime_id|rating|
+-------+--------+------+
|      0|      67|     9|
|      0|    6702|     7|
|      0|     242|    10|
|      0|    4898|     0|
|      0|      21|    10|
|      0|      24|     9|
|      0|    2104|     0|
|      0|    4722|     8|
|      0|    6098|     6|
|      0|    3125|     9|
|      0|     481|    10|
|      0|      68|     6|
|      0|    1689|     6|
|      0|    2913|     6|
|      0|    1250|     7|
|      0|     356|     9|
|      0|     121|     9|
|      0|     430|     9|
|      0|    1829|     7|
|      0|    1571|    10|
+-------+--------+------+
only showing top 20 rows



In [10]:
# Étape 2 : Configurer le modèle ALS
als = ALS(
    userCol="user_id",
    itemCol="anime_id",
    ratingCol="rating",
    nonnegative=True,              # Assurer que les prédictions sont positives
    implicitPrefs=False,           # On utilise des notes explicites
    coldStartStrategy="drop"       # Supprimer les prédictions sur données inconnues
)

# Étape 3 : Entraîner le modèle
model = als.fit(rating_df_spark)



24/12/01 13:31:17 WARN TaskSetManager: Stage 1 contains a task of very large size (22288 KiB). The maximum recommended task size is 1000 KiB.
24/12/01 13:31:21 WARN PythonRunner: Detected deadlock while completing task 0.0 in stage 1 (TID 1): Attempting to kill Python Worker
24/12/01 13:31:21 WARN TaskSetManager: Stage 2 contains a task of very large size (22288 KiB). The maximum recommended task size is 1000 KiB.
24/12/01 13:31:44 WARN InstanceBuilder: Failed to load implementation from:dev.ludovic.netlib.blas.JNIBLAS
                                                                                

In [11]:
# Étape 4 : Générer des recommandations pour tous les utilisateurs
user_recommendations = model.recommendForAllUsers(10)  # Top 10 recommandations
user_recommendations.show()



+-------+--------------------+
|user_id|     recommendations|
+-------+--------------------+
|      1|[{38524, 10.45463...|
|     12|[{36638, 5.058947...|
|     13|[{35674, 8.479095...|
|     22|[{42061, 14.45782...|
|     26|[{130, 0.0}, {124...|
|     27|[{35674, 8.917957...|
|     28|[{33600, 10.59284...|
|     31|[{33787, 6.564821...|
|     34|[{30071, 11.32690...|
|     44|[{18137, 8.790296...|
|     47|[{2904, 7.529252}...|
|     52|[{37531, 9.8969},...|
|     53|[{29585, 8.274739...|
|     65|[{18137, 10.54983...|
|     78|[{22477, 9.6513},...|
|     81|[{36638, 10.72236...|
|     85|[{9253, 8.261945}...|
|     91|[{38378, 10.67032...|
|     93|[{42438, 9.884198...|
|    101|[{30071, 8.845506...|
+-------+--------------------+
only showing top 20 rows



                                                                                

In [12]:
# Étape 5 : Générer des recommandations pour tous les animes
anime_recommendations = model.recommendForAllItems(10)  # Top 10 utilisateurs pour chaque anime
anime_recommendations.show()



+--------+--------------------+
|anime_id|     recommendations|
+--------+--------------------+
|       1|[{13532, 25.42809...|
|      22|[{2724, 18.686178...|
|      26|[{13532, 14.49771...|
|      27|[{2724, 12.716168...|
|      28|[{2724, 20.63881}...|
|      31|[{13532, 20.75611...|
|      44|[{13532, 17.85351...|
|      47|[{13532, 20.79097...|
|      52|[{2724, 16.945698...|
|      53|[{13532, 14.83919...|
|      65|[{13532, 13.03653...|
|      76|[{2724, 15.964684...|
|      81|[{2724, 15.596314...|
|      85|[{2724, 18.527826...|
|      91|[{13532, 15.20029...|
|      93|[{2724, 16.526283...|
|     101|[{13532, 16.06302...|
|     103|[{2724, 19.268835...|
|     108|[{2724, 13.70275}...|
|     111|[{2724, 20.898369...|
+--------+--------------------+
only showing top 20 rows



                                                                                

In [13]:
# Étape 6 : Recommandations pour un utilisateur spécifique
user_id_to_recommend = 123  # ID de l'utilisateur
user_subset = rating_df_spark.filter(rating_df_spark.user_id == user_id_to_recommend)
recommendations = model.recommendForUserSubset(user_subset, 5)  # Top 5 recommandations
recommendations.show()


24/12/01 13:32:46 WARN TaskSetManager: Stage 177 contains a task of very large size (22288 KiB). The maximum recommended task size is 1000 KiB.
                                                                                

+-------+--------------------+
|user_id|     recommendations|
+-------+--------------------+
|    123|[{42061, 13.14247...|
+-------+--------------------+



In [17]:
rec=(recommendations.toPandas().values)[0][1]

In [15]:
anime_df = read_csv("/kaggle/input/myanimelist-dataset/anime-dataset-2023.csv")


In [21]:
import pandas as pd

def idsToInfos(recommentations):
    id_rating_list=[tuple(x) for x in (recommendations.toPandas().values)[0][1]]
    # Créer une liste vide pour stocker les DataFrames filtrés
    filtered_data = []
    
    # Appliquer le filtre pour chaque tuple (anime_id, rating)
    for anime_id, rating in id_rating_list:
        filtered_data.append(anime_df[(anime_df["anime_id"] == anime_id)])
    
    # Combiner tous les DataFrames filtrés avec pd.concat()
    result_df = concat(filtered_data, ignore_index=True)
    
    return result_df
idsToInfos(recommendations)


Unnamed: 0,anime_id,Name,English name,Other name,Score,Genres,Synopsis,Type,Episodes,Aired,...,Studios,Source,Duration,Rating,Rank,Popularity,Favorites,Scored By,Members,Image URL
0,42061,Ichiman,UNKNOWN,イチマン,UNKNOWN,Comedy,Web commercials for Livesense's online part-ti...,ONA,18.0,"Jul 29, 2019 to Aug 28, 2019",...,UNKNOWN,Original,7 sec per ep,G - All Ages,16696.0,18776,0,UNKNOWN,204,https://cdn.myanimelist.net/images/anime/1694/...
1,35674,Jumbagi: Hanbandoui gongryong 3D,Speckles the Tarbosaurus,점박이: 한반도의 공룡 3D,UNKNOWN,"Adventure, Drama",70 million years ago dinosaurs ruled the Korea...,Movie,1.0,"Jan 26, 2012",...,UNKNOWN,Unknown,1 hr 28 min,G - All Ages,17000.0,18313,0,UNKNOWN,233,https://cdn.myanimelist.net/images/anime/9/860...
2,36620,Nintama Rantarou: Saraba Ninjutsu Gakuen no Dan,UNKNOWN,忍たま乱太郎 さらば 忍術学園 の段,UNKNOWN,Comedy,Rantarou misunderstands an overheard comment f...,Special,2.0,"Oct 30, 2017 to Oct 31, 2017",...,Ajia-do,Unknown,20 min per ep,G - All Ages,18538.0,16716,0,UNKNOWN,338,https://cdn.myanimelist.net/images/anime/10/88...
3,39767,Mao Yao de Huo Han,Love Story of Cat Spirit,猫妖的诱惑,6.62,"Fantasy, Romance",A cat demon with a split personality is reinca...,ONA,20.0,"Dec 21, 2018 to Jun 13, 2019",...,Rocen,Web manga,17 min per ep,PG-13 - Teens 13 or older,5741.0,13133,10,240.0,860,https://cdn.myanimelist.net/images/anime/1863/...
4,7175,Himitsukessha Taka no Tsume Countdown,UNKNOWN,秘密結社 鷹の爪 カウントダウン,5.65,Comedy,The second season of Eagle Talon.,TV,11.0,"Oct 7, 2009 to Dec 23, 2009",...,DLE,Unknown,11 min per ep,PG-13 - Teens 13 or older,10296.0,13972,0,152.0,659,https://cdn.myanimelist.net/images/anime/9/827...


In [29]:
model.save("/kaggle/output/model")