<a href="https://colab.research.google.com/github/Farah14-lab/MachineLearning-BigData/blob/main/ML_BigData_Slide48.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [45]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [46]:
# Install PySpark
!pip install pyspark

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [47]:
from tabulate import tabulate
from pyspark.mllib.recommendation import ALS, Rating
from pyspark.sql import SparkSession

In [48]:
# Create a SparkSession
spark = SparkSession.builder.getOrCreate()

In [49]:
# Read the ratings data from a file
raw = spark.sparkContext.textFile("/content/drive/MyDrive/BigData (1)/ML/ratings.dat")
mydata = [(2, 0.01), (3, 0.02), (4, 0.03)]
mydatardd = spark.sparkContext.parallelize(mydata).map(lambda x: Rating(0, x[0], x[1]))

In [50]:
# Define a function to parse the rating data
def parseRating(str):
    fields = str.split("::")
    assert len(fields) == 4
    return Rating(int(fields[0]), int(fields[1]), float(fields[2]))

In [51]:
# Parse the rating data
ratings = raw.map(parseRating)
totalRatings = ratings.union(mydatardd)

In [52]:
# Train the ALS model
rank = 8
numIterations = 5
lambda_ = 1.0
model = ALS.train(totalRatings, rank, numIterations, lambda_)

In [53]:
# Get the top 10 recommended products for user 1
userId = 1
numRecommendations = 10
products = model.recommendProducts(userId, numRecommendations)

In [54]:
# Convert the products list to an RDD
productsRDD = spark.sparkContext.parallelize(products)

In [55]:
# Load movie data, join with the recommendations, and display the names ordered by ratings
movies = spark.read.text("/content/drive/MyDrive/BigData (1)/ML/movies.dat")
movieNames = movies.rdd.map(lambda line: line.value.split("::")).map(lambda fields: (int(fields[0]), fields[1]))
recommendations = productsRDD.map(lambda rating: (rating.product, rating.rating))
movieRecommendations = movieNames.join(recommendations).sortBy(lambda x: -x[1][1])

# Prepare the data for tabular display
table_data = [(movie_id, movie_title, rating) for movie_id, (movie_title, rating) in movieRecommendations.take(10)]

# Display the recommendations in a table
headers = ["Movie ID", "Title", "Rating"]
table = tabulate(table_data, headers, tablefmt="grid")
print(table)

+------------+-------------------------------------------+----------+
|   Movie ID | Title                                     |   Rating |
|       3382 | Song of Freedom (1936)                    |  4.46389 |
+------------+-------------------------------------------+----------+
|        989 | Schlafes Bruder (Brother of Sleep) (1995) |  4.01517 |
+------------+-------------------------------------------+----------+
|        557 | Mamma Roma (1962)                         |  3.90163 |
+------------+-------------------------------------------+----------+
|        787 | Gate of Heavenly Peace, The (1995)        |  3.89335 |
+------------+-------------------------------------------+----------+
|       1830 | Follow the Bitch (1998)                   |  3.89262 |
+------------+-------------------------------------------+----------+
|       3233 | Smashing Time (1967)                      |  3.8913  |
+------------+-------------------------------------------+----------+
|       3607 | One L