<a href="https://colab.research.google.com/github/Abhishek3102/Apache-Spark/blob/main/Movielens_Movie_Recommendation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Collaborative Filtering: Alternating Least Squares(ALS)

In [1]:
!pip install pyspark



In [2]:
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName("Movie Recommender").getOrCreate()
spark

In [3]:
from pyspark.ml.evaluation import RegressionEvaluator
from pyspark.ml.recommendation import ALS

In [4]:
movie = spark.read.csv("movies.csv", inferSchema=True, header=True)
rating = spark.read.csv("ratings.csv", inferSchema=True, header=True)

In [9]:
joined_df = movie.join(rating, on="movieId", how="inner")
joined_df.show()

+-------+--------------------+--------------------+------+------+---------+
|movieId|               title|              genres|userId|rating|timestamp|
+-------+--------------------+--------------------+------+------+---------+
|      1|    Toy Story (1995)|Adventure|Animati...|     1|   4.0|964982703|
|      3|Grumpier Old Men ...|      Comedy|Romance|     1|   4.0|964981247|
|      6|         Heat (1995)|Action|Crime|Thri...|     1|   4.0|964982224|
|     47|Seven (a.k.a. Se7...|    Mystery|Thriller|     1|   5.0|964983815|
|     50|Usual Suspects, T...|Crime|Mystery|Thr...|     1|   5.0|964982931|
|     70|From Dusk Till Da...|Action|Comedy|Hor...|     1|   3.0|964982400|
|    101|Bottle Rocket (1996)|Adventure|Comedy|...|     1|   5.0|964980868|
|    110|   Braveheart (1995)|    Action|Drama|War|     1|   4.0|964982176|
|    151|      Rob Roy (1995)|Action|Drama|Roma...|     1|   5.0|964984041|
|    157|Canadian Bacon (1...|          Comedy|War|     1|   5.0|964984100|
|    163|   

In [10]:
joined_df.write.option("header", "true").csv("movie_ratings.csv")

In [11]:
joined_df.describe().show()

+-------+----------------+--------------------+------------------+------------------+------------------+--------------------+
|summary|         movieId|               title|            genres|            userId|            rating|           timestamp|
+-------+----------------+--------------------+------------------+------------------+------------------+--------------------+
|  count|          100836|              100836|            100836|            100836|            100836|              100836|
|   mean|19435.2957177992|                NULL|              NULL|326.12756356856676| 3.501556983616962|1.2059460873684695E9|
| stddev|35530.9871987003|                NULL|              NULL| 182.6184914635004|1.0425292390606342|2.1626103599513078E8|
|    min|               1|"11'09""01 - Sept...|(no genres listed)|                 1|               0.5|           828124615|
|    max|          193609|À nous la liberté...|           Western|               610|               5.0|          1537

In [13]:
(train, test) = joined_df.randomSplit([0.75,0.25], seed=42)

In [16]:
als = ALS(maxIter=5, regParam=0.01, userCol="userId", itemCol="movieId", ratingCol="rating")

In [17]:
model = als.fit(train)

In [18]:
pred = model.transform(test)

In [19]:
pred.show()

+-------+----------------+--------------------+------+------+----------+----------+
|movieId|           title|              genres|userId|rating| timestamp|prediction|
+-------+----------------+--------------------+------+------+----------+----------+
|      1|Toy Story (1995)|Adventure|Animati...|     7|   4.5|1106635946| 4.0000734|
|      1|Toy Story (1995)|Adventure|Animati...|    19|   4.0| 965705637|  2.990016|
|      1|Toy Story (1995)|Adventure|Animati...|    27|   3.0| 962685262| 4.3860087|
|      1|Toy Story (1995)|Adventure|Animati...|    31|   5.0| 850466616| 4.4407334|
|      1|Toy Story (1995)|Adventure|Animati...|    43|   5.0| 848993983| 5.4907837|
|      1|Toy Story (1995)|Adventure|Animati...|    57|   5.0| 965796031| 3.1829302|
|      1|Toy Story (1995)|Adventure|Animati...|    68|   2.5|1158531426| 3.2992108|
|      1|Toy Story (1995)|Adventure|Animati...|    86|   4.0|1344082549|  4.428684|
|      1|Toy Story (1995)|Adventure|Animati...|    89|   3.0|1520408314| 1.5

In [21]:
pred = pred.dropna(subset=["rating", "prediction"])

In [22]:
eval = RegressionEvaluator(metricName="rmse", labelCol="rating", predictionCol="prediction")
rmse = eval.evaluate(pred)
print(f"RMSE: {rmse}")

RMSE: 1.1030448804803894


In [32]:
user = input("Enter user ID: ")
entered_user = test.filter(test["userId"] == user).select(["movieId", "userId", "rating", "title", "genres"])
entered_user_sorted = entered_user.orderBy("rating", ascending=False)
entered_user_sorted.show()

Enter user ID: 16
+-------+------+------+--------------------+--------------------+
|movieId|userId|rating|               title|              genres|
+-------+------+------+--------------------+--------------------+
|    111|    16|   4.5|  Taxi Driver (1976)|Crime|Drama|Thriller|
|    541|    16|   4.5| Blade Runner (1982)|Action|Sci-Fi|Thr...|
|    608|    16|   4.5|        Fargo (1996)|Comedy|Crime|Dram...|
|   5618|    16|   4.5|Spirited Away (Se...|Adventure|Animati...|
|     50|    16|   4.0|Usual Suspects, T...|Crime|Mystery|Thr...|
|    319|    16|   4.0|Shallow Grave (1994)|Comedy|Drama|Thri...|
|   1207|    16|   4.0|To Kill a Mocking...|               Drama|
|   3741|    16|   4.0|     Badlands (1973)|Crime|Drama|Thriller|
|   5971|    16|   4.0|My Neighbor Totor...|Animation|Childre...|
|    923|    16|   3.5| Citizen Kane (1941)|       Drama|Mystery|
|   1186|    16|   3.5|Sex, Lies, and Vi...|               Drama|
|   1680|    16|   3.5|Sliding Doors (1998)|       Drama|R