# Recomender System Example

In [None]:
!pip install pyspark

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pyspark
  Downloading pyspark-3.3.0.tar.gz (281.3 MB)
[K     |████████████████████████████████| 281.3 MB 48 kB/s 
[?25hCollecting py4j==0.10.9.5
  Downloading py4j-0.10.9.5-py2.py3-none-any.whl (199 kB)
[K     |████████████████████████████████| 199 kB 50.1 MB/s 
[?25hBuilding wheels for collected packages: pyspark
  Building wheel for pyspark (setup.py) ... [?25l[?25hdone
  Created wheel for pyspark: filename=pyspark-3.3.0-py2.py3-none-any.whl size=281764026 sha256=b76fbef7775cfafc56ea620d21e45e4966da58585e315ff964ce1c6d3076dbc2
  Stored in directory: /root/.cache/pip/wheels/7a/8e/1b/f73a52650d2e5f337708d9f6a1750d451a7349a867f928b885
Successfully built pyspark
Installing collected packages: py4j, pyspark
Successfully installed py4j-0.10.9.5 pyspark-3.3.0


In [None]:
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName('rec').getOrCreate()

In [None]:
from pyspark.ml.evaluation import RegressionEvaluator
from pyspark.ml.recommendation import ALS

In [None]:
data = spark.read.csv('movielens_ratings.csv', inferSchema=True, header=True)
data.show()

+-------+------+------+
|movieId|rating|userId|
+-------+------+------+
|      2|   3.0|     0|
|      3|   1.0|     0|
|      5|   2.0|     0|
|      9|   4.0|     0|
|     11|   1.0|     0|
|     12|   2.0|     0|
|     15|   1.0|     0|
|     17|   1.0|     0|
|     19|   1.0|     0|
|     21|   1.0|     0|
|     23|   1.0|     0|
|     26|   3.0|     0|
|     27|   1.0|     0|
|     28|   1.0|     0|
|     29|   1.0|     0|
|     30|   1.0|     0|
|     31|   1.0|     0|
|     34|   1.0|     0|
|     37|   1.0|     0|
|     41|   2.0|     0|
+-------+------+------+
only showing top 20 rows



In [None]:
data.describe().show()

+-------+------------------+------------------+------------------+
|summary|           movieId|            rating|            userId|
+-------+------------------+------------------+------------------+
|  count|              1501|              1501|              1501|
|   mean| 49.40572951365756|1.7741505662891406|14.383744170552964|
| stddev|28.937034065088994| 1.187276166124803| 8.591040424293272|
|    min|                 0|               1.0|                 0|
|    max|                99|               5.0|                29|
+-------+------------------+------------------+------------------+



In [None]:
#split dataset into 80% training and 20% testing
trainData, testData = data.randomSplit([0.8, 0.2])

#build recommendation system using ALS on the training data
als = ALS(maxIter=5, regParam=0.01, userCol= 'userId', itemCol='movieId', ratingCol='rating')
model = als.fit(trainData)

In [None]:
#evaluate the model
predictions = model.transform(testData)

predictions.show()

+-------+------+------+-----------+
|movieId|rating|userId| prediction|
+-------+------+------+-----------+
|      6|   3.0|    26|  2.4488475|
|      3|   1.0|     1|-0.28626603|
|      6|   1.0|     1|  1.6131517|
|      2|   1.0|    16|  1.4077753|
|      6|   2.0|    16| 0.84228253|
|      8|   3.0|     3|  0.5282138|
|      0|   1.0|     5|    1.42189|
|     11|   1.0|    19|  1.9092952|
|      3|   1.0|     9|  1.7059038|
|      1|   1.0|     4|  1.0306745|
|     10|   1.0|     4|-0.74857736|
|      9|   1.0|     8|  0.6258663|
|      7|   1.0|     7|-0.71077746|
|     10|   1.0|     7|  1.5114623|
|      2|   1.0|    25|  0.8005977|
|     10|   2.0|    24|   2.970503|
|      4|   1.0|    29|-0.99207056|
|      6|   1.0|     2| 0.64856166|
|     10|   1.0|     2|  1.5447516|
|      5|   2.0|    18|-0.86045814|
+-------+------+------+-----------+
only showing top 20 rows



In [None]:
evaluator = RegressionEvaluator(metricName="rmse", labelCol="rating",predictionCol="prediction")
rmse = evaluator.evaluate(predictions)
print("Root-mean-square error = " + str(rmse))

Root-mean-square error = 1.6966644546749163


In [None]:
singleUser = testData.filter(testData['userId']==11).select(['movieId', 'userId'])

singleUser.show()

+-------+------+
|movieId|userId|
+-------+------+
|     18|    11|
|     25|    11|
|     35|    11|
|     36|    11|
|     45|    11|
|     62|    11|
|     70|    11|
|     71|    11|
|     76|    11|
|     89|    11|
|     99|    11|
+-------+------+



In [None]:
reccomendations = model.transform(singleUser)

In [None]:
reccomendations.orderBy('prediction',ascending=False).show()

+-------+------+----------+
|movieId|userId|prediction|
+-------+------+----------+
|     76|    11|  4.974656|
|     36|    11| 2.6895826|
|     89|    11| 1.4188597|
|     45|    11| 0.5881755|
|     35|    11| 0.5573224|
|     99|    11| 0.4588604|
|     62|    11|0.34472463|
|     18|    11| -1.521415|
|     70|    11|-1.6650418|
|     71|    11| -1.886352|
|     25|    11|-2.6538138|
+-------+------+----------+

