In [1]:
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName('movie').master('local').getOrCreate()

In [2]:
data = spark.read.csv('movielens_ratings.csv', inferSchema=True, header=True)

In [3]:
data.printSchema()

root
 |-- movieId: integer (nullable = true)
 |-- rating: double (nullable = true)
 |-- userId: integer (nullable = true)



In [4]:
data.describe().show()

+-------+------------------+------------------+------------------+
|summary|           movieId|            rating|            userId|
+-------+------------------+------------------+------------------+
|  count|              1501|              1501|              1501|
|   mean| 49.40572951365756|1.7741505662891406|14.383744170552964|
| stddev|28.937034065088994| 1.187276166124803| 8.591040424293272|
|    min|                 0|               1.0|                 0|
|    max|                99|               5.0|                29|
+-------+------------------+------------------+------------------+



In [5]:
(training, testing) = data.randomSplit([0.8,0.2])

In [7]:
from pyspark.ml.recommendation import ALS
als = ALS(maxIter=5, regParam=0.01, userCol='userId', itemCol='movieId', ratingCol='rating')
model = als.fit(training)

In [8]:
predictions = model.transform(testing)

In [10]:
predictions.show(5)

+-------+------+------+-----------+
|movieId|rating|userId| prediction|
+-------+------+------+-----------+
|     31|   4.0|    12| -1.0437276|
|     31|   1.0|     4|   2.962365|
|     31|   1.0|    24|-0.56251526|
|     31|   1.0|    29|  1.3098059|
|     85|   3.0|     1|  0.5903579|
+-------+------+------+-----------+
only showing top 5 rows



In [11]:
single_user = testing.filter(testing['userId']==11).select(['movieId', 'userId'])

In [13]:
single_user.show()

+-------+------+
|movieId|userId|
+-------+------+
|     20|    11|
|     23|    11|
|     38|    11|
|     41|    11|
|     51|    11|
|     70|    11|
|     71|    11|
|     86|    11|
+-------+------+



In [15]:
recommendations = model.transform(single_user)
recommendations.orderBy('prediction', ascending=False).show()

+-------+------+-------------+
|movieId|userId|   prediction|
+-------+------+-------------+
|     38|    11|    5.3968105|
|     71|    11|     4.884364|
|     23|    11|    3.6287725|
|     86|    11|    2.6535852|
|     51|    11|    2.3715425|
|     70|    11|    1.7147241|
|     41|    11|    1.4239097|
|     20|    11|-0.0077199936|
+-------+------+-------------+

