In [12]:
from pyspark.sql import SparkSession
from pyspark.ml.recommendation import ALSModel
from pyspark.sql.functions import col, lit
import time
import random

In [9]:
# ---------------- CONFIGURATION ----------------
MODEL_PATH = "/models/als"
MOVIE_CSV_PATH = "hdfs:///input/movie.csv"
RATING_CSV_PATH = "hdfs:///input/rating.csv"
TOP_N = 5  # Nombre de recommandations

In [3]:
# ---------------- INIT SPARK ----------------
spark = SparkSession.builder \
    .appName("FakePredictSingleUser") \
    .master("yarn") \
    .getOrCreate()

Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
25/04/30 16:54:48 WARN Client: Neither spark.yarn.jars nor spark.yarn.archive is set, falling back to uploading libraries under SPARK_HOME.


In [4]:
# ---------------- CHARGER DONNÉES ----------------
print("📥 Lecture des fichiers CSV depuis HDFS...")
ratings_df = spark.read.csv(RATING_CSV_PATH, header=True)
movies_df = spark.read.csv(MOVIE_CSV_PATH, header=True)

ratings_df = ratings_df.select(
    col("userId").cast("int"),
    col("movieId").cast("int"),
    col("rating").cast("float")
)

📥 Lecture des fichiers CSV depuis HDFS...


                                                                                

In [6]:
users = ratings_df.select("userId").distinct().rdd.flatMap(lambda x: x).collect()
movies = movies_df.select("movieId").distinct().rdd.flatMap(lambda x: x).collect()

def generate_rating():
    user_id = random.choice(users)
    movie_id = random.choice(movies)
    rating = round(random.uniform(0.5, 5.0), 1)  # Note entre 0.5 et 5.0
    timestamp = int(time.time())  # Timestamp actuel
    return {
        'userId': user_id,
        'movieId': movie_id,
        'rating': rating,
        'timestamp': timestamp
    }

                                                                                

In [7]:
# Ajouter la nouvelle note
random_rating = generate_rating()
user_id = random_rating['userId']
movie_id = random_rating['movieId']
rating_value = random_rating['rating']

# Créer un DataFrame pour la nouvelle note
new_rating_df = spark.createDataFrame(
    [(user_id, movie_id, rating_value)],
    ["userId", "movieId", "rating"]
)
updated_ratings_df = ratings_df.union(new_rating_df)

In [10]:
# ---------------- CHARGER MODÈLE ----------------
model = ALSModel.load(MODEL_PATH)

In [13]:
# ---------------- FILMS NON VUS ----------------
all_movies = ratings_df.select("movieId").distinct()
rated_movies = ratings_df.filter(col("userId") == user_id).select("movieId")
unrated_movies = all_movies.subtract(rated_movies)

user_unrated_df = unrated_movies.withColumn("userId", lit(user_id).cast("int")) \
                                .select("userId", "movieId")

In [14]:
# ---------------- PREDICTIONS ----------------
predictions = model.transform(user_unrated_df).dropna(subset=["prediction"])
top_n = predictions.orderBy(col("prediction").desc()).limit(TOP_N)

In [15]:
# ---------------- AFFICHAGE ----------------
movies_with_names = movies_df.withColumn("movieId", col("movieId").cast("int"))
top_n_with_titles = top_n.join(movies_with_names, on="movieId").select("title", "prediction")

print(f"\n🎯 Top {TOP_N} recommandations pour l'utilisateur {user_id} :\n")
top_n_with_titles.show(truncate=False)


🎯 Top 5 recommandations pour l'utilisateur 48500 :



                                                                                

+-----------------------------------------------------+----------+
|title                                                |prediction|
+-----------------------------------------------------+----------+
|Konopielka (1982)                                    |5.269062  |
|Rewind This! (2013)                                  |5.2578945 |
|Craig Ferguson: A Wee Bit o' Revolution (2009)       |5.299748  |
|Patton Oswalt: Tragedy Plus Comedy Equals Time (2014)|5.3830137 |
|Tom Segura: Completely Normal (2014)                 |5.2831483 |
+-----------------------------------------------------+----------+



In [16]:
spark.stop()