# Recommender System
Using Alternating Least Square (ALS)

In [0]:
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName('rec').getOrCreate()

In [0]:
from pyspark.ml.recommendation import ALS
from pyspark.ml.evaluation import RegressionEvaluator

In [0]:
data = spark.read.csv('/FileStore/tables/movielens_ratings.csv',inferSchema= True, header=True)
data.show()

+-------+------+------+
|movieId|rating|userId|
+-------+------+------+
|      2|   3.0|     0|
|      3|   1.0|     0|
|      5|   2.0|     0|
|      9|   4.0|     0|
|     11|   1.0|     0|
|     12|   2.0|     0|
|     15|   1.0|     0|
|     17|   1.0|     0|
|     19|   1.0|     0|
|     21|   1.0|     0|
|     23|   1.0|     0|
|     26|   3.0|     0|
|     27|   1.0|     0|
|     28|   1.0|     0|
|     29|   1.0|     0|
|     30|   1.0|     0|
|     31|   1.0|     0|
|     34|   1.0|     0|
|     37|   1.0|     0|
|     41|   2.0|     0|
+-------+------+------+
only showing top 20 rows



In [0]:
train_data, test_data = data.randomSplit([0.8,0.2])

In [0]:
als = ALS(maxIter=5,regParam=0.01,userCol='userId',itemCol='movieId',ratingCol='rating')
model = als.fit(train_data)

In [0]:
predictions = model.transform(test_data)
predictions.show()

+-------+------+------+-----------+
|movieId|rating|userId| prediction|
+-------+------+------+-----------+
|      0|   3.0|    28|  -1.103605|
|      2|   4.0|    28| -1.7432746|
|      6|   1.0|    28|  1.1946769|
|      4|   4.0|    26|    3.26961|
|      5|   2.0|    26|  1.7553195|
|      3|   1.0|    13|  1.5090041|
|      0|   1.0|     6| 0.16240537|
|      5|   1.0|     6|-0.29064864|
|      5|   3.0|    16|  2.0159562|
|      0|   1.0|     3|  0.3690589|
|      2|   1.0|     3|  1.1212615|
|      5|   1.0|     5|  0.8015554|
|      2|   1.0|    19|  0.8260696|
|      3|   1.0|    17|  0.3540058|
|      0|   1.0|    23|  2.3739882|
|      2|   1.0|    23|  1.8881061|
|      2|   2.0|     7|  1.5943474|
|      0|   1.0|    21|-0.64848316|
|      3|   1.0|    21|  1.2155275|
|      2|   3.0|     0|  -0.498946|
+-------+------+------+-----------+
only showing top 20 rows



In [0]:
evaluator = RegressionEvaluator(metricName='rmse', labelCol='rating',predictionCol='prediction')

In [0]:
rmse = evaluator.evaluate(predictions)
rmse

Out[11]: 2.015111957600273

In [0]:
single_user = test_data.filter(test_data['userId']==11).select(['movieId','userId'])

In [0]:
recommendations = model.transform(single_user)
recommendations.orderBy(recommendations['prediction'].desc()).show()

+-------+------+------------+
|movieId|userId|  prediction|
+-------+------+------------+
|     25|    11|   3.4896348|
|     12|    11|   2.6410546|
|     48|    11|   2.1784835|
|     35|    11|   1.6677058|
|     97|    11|   1.0794977|
|     11|    11|  0.18500778|
|     72|    11|-0.015638288|
|     81|    11| -0.18099721|
|     13|    11| -0.28577465|
|     23|    11|  -1.0243568|
|     32|    11|   -2.370854|
+-------+------+------------+

