In [1]:
from pyspark.sql import SparkSession
from pyspark.ml.recommendation import ALSModel
from pyspark.sql.functions import col
import sys

In [None]:
# ---------------- CONFIGURATION ----------------
MODEL_PATH = "/models/als_model"
MOVIE_CSV_PATH = "hdfs:///input/movie.csv"
RATING_CSV_PATH = "hdfs:///input/rating.csv"
TOP_N = 5  # Nombre de recommandations

In [None]:
# ---------------- INIT SPARK ----------------
spark = SparkSession.builder \
    .appName("ALS Predict Single User") \
    .master("spark://namenode:7077") \
    .getOrCreate()

In [None]:
# ---------------- PARAMÈTRES CLI ----------------
if len(sys.argv) != 4:
    print("Usage: python als_predict_topn.py <userId> <movieId> <rating>")
    sys.exit(1)

user_id = int(sys.argv[1])
movie_id = int(sys.argv[2])
rating_value = float(sys.argv[3])

In [None]:
# ---------------- CHARGER DONNÉES ----------------
ratings_df = spark.read.option("header", True).csv(RATING_CSV_PATH)
movies_df = spark.read.option("header", True).csv(MOVIE_CSV_PATH)

ratings_df = ratings_df.select(
    col("userId").cast("int"),
    col("movieId").cast("int"),
    col("rating").cast("float")
)

In [None]:
# Ajouter la nouvelle note
new_rating = spark.createDataFrame([(user_id, movie_id, rating_value)], ["userId", "movieId", "rating"])
updated_ratings_df = ratings_df.union(new_rating)

In [None]:
# ---------------- CHARGER MODÈLE ----------------
model = ALSModel.load(MODEL_PATH)