In [2]:
import findspark
findspark.init()

In [3]:
import sys
sys.path.append("../")

In [4]:
from pyspark.sql import SparkSession
from pyspark.sql.types import IntegerType

In [5]:
spark = SparkSession.builder.appName("RecommenderSystem").getOrCreate()

In [None]:
data = spark.read.csv("../data/processed/preprocessed.csv", header=True, inferSchema=True)


In [None]:
data.toPandas()

In [None]:
data = data.withColumn("ISBN", data["ISBN"].cast(IntegerType()))
data = data.filter(data["ISBN"].isNotNull())
data = data.withColumn("User-ID", data["User-ID"].cast(IntegerType()))
data = data.withColumn("Book-Rating", data["Book-Rating"].cast(IntegerType()))
data = data.withColumn("AuthorIndex", data["AuthorIndex"].cast(IntegerType()))
data = data.withColumn("PublisherIndex", data["PublisherIndex"].cast(IntegerType()))

In [None]:
train, test = data.randomSplit([0.8, 0.2], seed=42)

## ALS

In [None]:
from src.training import Using_ALS
from pyspark.ml.evaluation import RegressionEvaluator

In [None]:
als_model, prediction = Using_ALS(data, maxIter=20, path="../models/ALSModel")

In [None]:
evaluator = RegressionEvaluator(metricName="rmse", labelCol="Book-Rating", predictionCol="prediction")
rmse = evaluator.evaluate(prediction)
print("Root Mean Squared Error (RMSE) = " + str(rmse))

In [None]:
user_id = 230249  # Remplacez par l'ID d'utilisateur souhaité
user_predictions = als_model.recommendForUserSubset(data.filter(data["User-ID"] == user_id), 5)

In [None]:
user_predictions.collect()

## K-Means

In [None]:
from src.training import Using_Kmeans

In [None]:
kmeans_model, predictions = Using_Kmeans(data, k=5, path="../models/KMeansModel")

In [None]:
predictions.select("User-ID", "features","prediction").show()

In [None]:
from pyspark.ml.evaluation import ClusteringEvaluator

# Évaluation du modèle avec le score silhouette
evaluator = ClusteringEvaluator()
silhouette = evaluator.evaluate(predictions)
print("Silhouette Score = " + str(silhouette))