In [2]:
!pip install pyspark

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pyspark
  Downloading pyspark-3.3.2.tar.gz (281.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m281.4/281.4 MB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting py4j==0.10.9.5
  Downloading py4j-0.10.9.5-py2.py3-none-any.whl (199 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m199.7/199.7 kB[0m [31m23.8 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: pyspark
  Building wheel for pyspark (setup.py) ... [?25l[?25hdone
  Created wheel for pyspark: filename=pyspark-3.3.2-py2.py3-none-any.whl size=281824028 sha256=4f66d8da92c55e415b4771233a1cc46c98d80b31b190b852f9b567d441f41188
  Stored in directory: /root/.cache/pip/wheels/6c/e3/9b/0525ce8a69478916513509d43693511463c6468db0de237c86
Successfully built pyspark
Installing collected packages: py4j, pyspa

In [3]:
# Import Apache Spark SQL
from pyspark.sql import SparkSession

# Create Spark Session/Context
# We are using local machine with all the CPU cores [*]
spark = SparkSession.builder \
    .master("local[*]") \
    .appName("Hello Pyspark") \
    .config("spark.some.config.option", "some-value") \
    .getOrCreate()

In [4]:
# Check spark session
spark

In [6]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [21]:
from pyspark.sql import Row

lines = spark.read.text("/content/gdrive/My Drive/My File/sample_movielens_ratings.txt").rdd
parts = lines.map(lambda row: row.value.split("::"))
ratingsRDD = parts.map(lambda p: Row(userId=int(p[0]), movieId=int(p[1]),
                                     rating=float(p[2]), timestamp=int(p[3])))
ratings = spark.createDataFrame(ratingsRDD)
(training, test) = ratings.randomSplit([0.8, 0.2])

In [29]:
from pyspark.ml.recommendation import ALS
from pyspark.ml.evaluation import RegressionEvaluator

# Build the recommendation model using ALS on the training data
# Note we set cold start strategy to 'drop' to ensure we don't get NaN evaluation metrics
iters=[3, 5, 10, 15, 20]
regPars=[0.01, 0.03, 0.05, 0.08, 0.1]
for regPar in regPars:
  for iter in iters:
    als = ALS(maxIter=iter, regParam=regPar, userCol="userId", itemCol="movieId", ratingCol="rating",
              coldStartStrategy="drop")
    model = als.fit(training)

    # Evaluate the model by computing the RMSE on the test data
    predictions = model.transform(test)
    evaluator = RegressionEvaluator(metricName="rmse", labelCol="rating",
                                    predictionCol="prediction")
    rmse = evaluator.evaluate(predictions)
    print("Root-mean-square error for maxIter=" + str(iter) + " and regParam=" + str(regPar) + " : " + str(rmse))

Root-mean-square error for maxIter=3 and regParam=0.01 : 1.9675776127303766
Root-mean-square error for maxIter=5 and regParam=0.01 : 1.895122903970421
Root-mean-square error for maxIter=10 and regParam=0.01 : 1.8370303554287901
Root-mean-square error for maxIter=15 and regParam=0.01 : 1.8206128834151993
Root-mean-square error for maxIter=20 and regParam=0.01 : 1.804200008849716
Root-mean-square error for maxIter=3 and regParam=0.03 : 1.6225927893852579
Root-mean-square error for maxIter=5 and regParam=0.03 : 1.5529094774512684
Root-mean-square error for maxIter=10 and regParam=0.03 : 1.4534616691361113
Root-mean-square error for maxIter=15 and regParam=0.03 : 1.3831134897902422
Root-mean-square error for maxIter=20 and regParam=0.03 : 1.3137164453998869
Root-mean-square error for maxIter=3 and regParam=0.05 : 1.4633368793237
Root-mean-square error for maxIter=5 and regParam=0.05 : 1.3813056357512077
Root-mean-square error for maxIter=10 and regParam=0.05 : 1.2849902443644219
Root-mean-

From the different hyperparameter that we have tried, we can conclude that the higher the regParam and maxIter, the better the model perform. But, we can't try a higher regParam or maxIter than what we have tried, because a stackoverflow problem