In [0]:
from pyspark.sql import SparkSession
spark=SparkSession.builder.appName('rec').getOrCreate()

In [0]:
from pyspark.ml.recommendation import ALS

In [0]:
from pyspark.ml.evaluation import RegressionEvaluator

In [0]:
data=spark.read.table('movielens_ratings')

In [0]:
data.show()

+-------+------+------+
|movieId|rating|userId|
+-------+------+------+
|      2|   3.0|     0|
|      3|   1.0|     0|
|      5|   2.0|     0|
|      9|   4.0|     0|
|     11|   1.0|     0|
|     12|   2.0|     0|
|     15|   1.0|     0|
|     17|   1.0|     0|
|     19|   1.0|     0|
|     21|   1.0|     0|
|     23|   1.0|     0|
|     26|   3.0|     0|
|     27|   1.0|     0|
|     28|   1.0|     0|
|     29|   1.0|     0|
|     30|   1.0|     0|
|     31|   1.0|     0|
|     34|   1.0|     0|
|     37|   1.0|     0|
|     41|   2.0|     0|
+-------+------+------+
only showing top 20 rows



In [0]:
data.describe().show()

+-------+------------------+------------------+------------------+
|summary|           movieId|            rating|            userId|
+-------+------------------+------------------+------------------+
|  count|              1501|              1501|              1501|
|   mean| 49.40572951365756|1.7741505662891406|14.383744170552964|
| stddev|28.937034065088994| 1.187276166124803| 8.591040424293272|
|    min|                 0|               1.0|                 0|
|    max|                99|               5.0|                29|
+-------+------------------+------------------+------------------+



In [0]:
training,test=data.randomSplit([0.8,0.2])

In [0]:
als_model=ALS(maxIter=5,regParam=0.01,userCol='userId',itemCol='movieId',ratingCol='rating')

In [0]:
model=als_model.fit(training)

In [0]:
predictions=model.transform(test)

In [0]:
predictions.show()

+-------+------+------+-----------+
|movieId|rating|userId| prediction|
+-------+------+------+-----------+
|      0|   3.0|    28|  0.6974516|
|      1|   1.0|    28| 0.15031911|
|      2|   1.0|    12|  0.7130363|
|      3|   1.0|    13| 0.95484626|
|      2|   3.0|     6|  3.7912478|
|      5|   1.0|     6|-0.09739289|
|      0|   1.0|     3|  0.8705987|
|      2|   1.0|     3| -1.3808072|
|      1|   1.0|    20|   1.062402|
|      4|   2.0|    20|-0.59656024|
|      1|   1.0|     5|  0.9005945|
|      5|   1.0|     5|  1.0987962|
|      4|   1.0|    19| 0.69999087|
|      1|   4.0|    15|  0.6546289|
|      2|   4.0|     8|   4.312378|
|      5|   1.0|     8|  1.7599182|
|      4|   1.0|    23|  1.9588348|
|      1|   1.0|     7| 0.35469788|
|      3|   1.0|     7|   1.414608|
|      4|   3.0|    10|-0.06879255|
+-------+------+------+-----------+
only showing top 20 rows



In [0]:
evaluator=RegressionEvaluator(metricName='rmse',labelCol='rating',predictionCol='prediction')

In [0]:
rmse=evaluator.evaluate(predictions)

In [0]:
rmse

1.794219920727188

In [0]:
single_user=test.filter(test['userId']==11).select(['movieId','userId'])

In [0]:
single_user.show()

+-------+------+
|movieId|userId|
+-------+------+
|      0|    11|
|      9|    11|
|     20|    11|
|     25|    11|
|     32|    11|
|     43|    11|
|     48|    11|
|     71|    11|
|     76|    11|
|     78|    11|
|     80|    11|
|     81|    11|
|     88|    11|
+-------+------+



In [0]:
recommendations=model.transform(single_user)

In [0]:
recommendations.orderBy('prediction',ascending=False).show()

+-------+------+-----------+
|movieId|userId| prediction|
+-------+------+-----------+
|     25|    11|  4.0888367|
|     76|    11|  3.9796467|
|      9|    11|  3.9411612|
|     71|    11|  3.2214415|
|     80|    11|  1.5048838|
|     20|    11|  1.3599882|
|     48|    11|  1.2722015|
|     78|    11| 0.86395836|
|     81|    11|  0.7588352|
|     43|    11| 0.47395116|
|     88|    11|-0.45729652|
|      0|    11| -0.9451701|
|     32|    11|  -1.578334|
+-------+------+-----------+

