In [1]:
from pyspark.sql import SparkSession as ss
spark = ss.builder.appName('rec').getOrCreate()

In [2]:
from pyspark.ml.recommendation import ALS

In [3]:
from pyspark.ml.evaluation import RegressionEvaluator

In [4]:
data = spark.read.csv('movielens_ratings.csv', inferSchema = True, header = True)

In [6]:
data.printSchema()

root
 |-- movieId: integer (nullable = true)
 |-- rating: double (nullable = true)
 |-- userId: integer (nullable = true)



In [8]:
train, test = data.randomSplit([0.8,0.2])

In [9]:
als = ALS(maxIter=5, regParam=0.01, userCol='userId', itemCol='movieId', ratingCol='rating')

In [10]:
model = als.fit(train)

In [13]:
prediction = model.transform(test)

In [14]:
prediction.show()

+-------+------+------+-----------+
|movieId|rating|userId| prediction|
+-------+------+------+-----------+
|     31|   1.0|    26|  1.1693002|
|     31|   4.0|    12|  0.6375409|
|     31|   1.0|    29|   0.825013|
|     85|   1.0|    13|  1.8989636|
|     85|   5.0|    16| 0.26125348|
|     85|   5.0|     8|  1.4913979|
|     85|   4.0|     7|  2.0589018|
|     85|   3.0|    21|  3.1077423|
|     65|   2.0|    15| -0.3118787|
|     53|   1.0|     6|-0.82878155|
|     53|   1.0|    23|  3.6619213|
|     53|   1.0|     7|  0.9941268|
|     78|   1.0|     1|  1.0623198|
|     78|   1.0|    19|  0.6004365|
|     78|   1.0|    17| 0.58261186|
|     34|   1.0|    19|  2.0343156|
|     34|   1.0|    14|  2.5321896|
|     81|   1.0|     1|  3.4048033|
|     81|   2.0|    29|  2.1292088|
|     81|   3.0|    18|-0.97793835|
+-------+------+------+-----------+
only showing top 20 rows



In [15]:
evaluator= RegressionEvaluator(metricName='rmse',labelCol='rating',predictionCol='prediction')

In [16]:
rmse = evaluator.evaluate(prediction)

In [17]:
rmse

1.9396984017990924

In [18]:
single_user = test.filter(test['userId'] == 11).select('movieId','userId')

In [19]:
single_user.show()

+-------+------+
|movieId|userId|
+-------+------+
|      0|    11|
|     12|    11|
|     18|    11|
|     48|    11|
|     80|    11|
|     90|    11|
+-------+------+



In [20]:
recommendation = model.transform(single_user)

In [21]:
recommendation.orderBy('prediction', ascending = False).show()

+-------+------+----------+
|movieId|userId|prediction|
+-------+------+----------+
|     18|    11|  5.952113|
|     90|    11|  5.780332|
|     80|    11| 3.9571958|
|     48|    11| 0.7231833|
|      0|    11|0.22380972|
|     12|    11|-0.6019163|
+-------+------+----------+

