In [0]:
import os
from pyspark.sql.functions import *
from pyspark.sql.types import *
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, IntegerType
from pyspark.sql.functions import from_unixtime, to_date, col, lower, regexp_replace, dense_rank, when, expr, desc
from pyspark.conf import SparkConf
from pyspark.sql.window import Window
from pyspark.ml.recommendation import ALS, ALSModel
from pyspark.ml.evaluation import RegressionEvaluator
from pyspark.ml.tuning import CrossValidator, ParamGridBuilder

In [0]:
# Cargamos los datos que guardamos del proceso etl de entrenamiento
train_df = spark.read.format("parquet").load("/mnt/backupaws/Datos_ML_RESTAURANT/Datos_Restaurant_entrenamiento/part-00000-tid-445150914951320818-63681dd9-f751-4b80-9c35-2aca29a8a6b2-14-1-c000.snappy.parquet")

In [0]:
# Cargamos los datos que guardamos del proceso etl de entrenamiento
test_df = spark.read.format("parquet").load("/mnt/backupaws/Datos_ML_RESTAURANT/Datos_Restaurant_testeo/part-00000-tid-8932244569014287907-64f02920-82d1-47ff-b5c7-2d32a58ebd6b-21-1-c000.snappy.parquet")

In [0]:
als = ALS(maxIter=100, regParam=0.1, userCol="id_name", itemCol="id_name_empresa", ratingCol="rating",
          coldStartStrategy="drop", blockSize=4096)
als = als.setPredictionCol("prediction")

In [0]:
model = als.fit(train_df)

In [0]:
# Definir evaluador
evaluator = RegressionEvaluator(metricName="rmse", labelCol="rating", predictionCol="prediction")

In [0]:
# maxiter =50 --- 2.532
# maxiter = 100 --- 2.0996
predictions = model.transform(test_df)
rmse = evaluator.evaluate(predictions)
print("RMSE en conjunto de pruebas: {:.4f}".format(rmse))

In [0]:
# Predicciones para cada usuario
userRecs = model.recommendForAllUsers(1000)

# Selección de las 10 películas más recomendadas para cada usuario
userRecs = userRecs.select(
    col("id_name"),
    expr("explode(recommendations)").alias("recommendation")
).select(
    col("id_name"),
    col("recommendation.id_name_empresa"),
    col("recommendation.rating")
)

# Ordenamiento de las películas según su predicción
userRecs = userRecs.orderBy(
    col("id_name"), desc("rating")
)

In [0]:
userRecs = userRecs.coalesce(1)

In [0]:
userRecs.write.mode("overwrite").parquet("/mnt/backupaws/metadata/Users_recomendations_Restaurant")