In [1]:
import findspark
findspark.init("/home/ratzz/spark-2.4.4-bin-hadoop2.7")
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName('lr').getOrCreate()

In [2]:
from pyspark.ml.recommendation import ALS

In [3]:
from pyspark.ml.evaluation import RegressionEvaluator

In [4]:
data = spark.read.csv("movielens_ratings.csv",inferSchema=True,header = True)

In [5]:
data.toPandas()

Unnamed: 0,movieId,rating,userId
0,2,3.0,0
1,3,1.0,0
2,5,2.0,0
3,9,4.0,0
4,11,1.0,0
...,...,...,...
1496,90,4.0,29
1497,93,1.0,29
1498,94,4.0,29
1499,97,1.0,29


In [6]:
data.describe().show()

+-------+------------------+------------------+------------------+
|summary|           movieId|            rating|            userId|
+-------+------------------+------------------+------------------+
|  count|              1501|              1501|              1501|
|   mean| 49.40572951365756|1.7741505662891406|14.383744170552964|
| stddev|28.937034065088994| 1.187276166124803| 8.591040424293272|
|    min|                 0|               1.0|                 0|
|    max|                99|               5.0|                29|
+-------+------------------+------------------+------------------+



In [7]:
train,test = data.randomSplit([0.8,0.2])

In [8]:
als = ALS(maxIter=5,regParam=0.01,userCol="userId",itemCol="movieId",ratingCol="rating")

In [9]:
model = als.fit(train)

In [10]:
predictions = model.transform(test)

In [11]:
predictions.show()

+-------+------+------+-----------+
|movieId|rating|userId| prediction|
+-------+------+------+-----------+
|     31|   1.0|    27|  1.4008784|
|     31|   1.0|     5|  2.0702376|
|     31|   3.0|    14| 0.40530974|
|     85|   1.0|    26| 0.25556642|
|     85|   5.0|    16|  3.2163649|
|     65|   1.0|    28| 0.40826586|
|     65|   1.0|    24|  1.9483659|
|     53|   3.0|    13|   2.727082|
|     53|   1.0|    23| -1.5980185|
|     78|   1.0|    13| 0.76904076|
|     78|   1.0|    20|  0.6834806|
|     78|   1.0|     8| 0.59638727|
|     34|   3.0|     3| 0.92622066|
|     34|   1.0|    19|-0.09312166|
|     34|   4.0|     2|  2.3890874|
|     81|   3.0|    26|-0.04838602|
|     81|   1.0|    22|  3.6399715|
|     81|   1.0|     1|  2.6397195|
|     81|   1.0|     6|  1.2573669|
|     81|   2.0|    29| -0.0902441|
+-------+------+------+-----------+
only showing top 20 rows



In [12]:
evaluator = RegressionEvaluator(metricName="rmse",labelCol="rating",predictionCol="prediction")

In [13]:
rmse = evaluator.evaluate(predictions)

In [14]:
single_user = test.filter(test["userId"]==11).select(["movieId","userId"])

In [15]:
single_user.show()

+-------+------+
|movieId|userId|
+-------+------+
|      6|    11|
|      9|    11|
|     21|    11|
|     35|    11|
|     43|    11|
|     59|    11|
|     61|    11|
|     76|    11|
|     77|    11|
|     81|    11|
|     86|    11|
+-------+------+



In [16]:
recommendations = model.transform(single_user)

In [17]:
recommendations.orderBy("prediction",ascending=False).show()

+-------+------+----------+
|movieId|userId|prediction|
+-------+------+----------+
|     35|    11| 3.5595253|
|     43|    11| 2.8404057|
|     77|    11| 2.0123446|
|     76|    11| 1.7260756|
|     86|    11| 1.5888253|
|     61|    11| 1.5579386|
|     59|    11|  1.534821|
|      6|    11| 1.3992915|
|     21|    11| 0.9730568|
|     81|    11| 0.5618692|
|      9|    11|-0.3146076|
+-------+------+----------+



# Made By Ratnakar Maurya