# Building a Recommendation System using Apache Spark - Alternating Least Squares (ALS)

### Import libraries and Create a Spark session

In [19]:
from pyspark.sql import SparkSession
from pyspark.ml.recommendation import ALS
from pyspark.ml.evaluation import RegressionEvaluator
# Create a Spark session
spark = SparkSession.builder.appName("AmazonProductRecommendation").getOrCreate()
spark.sparkContext.setLogLevel("ERROR")

### Reading the dataset

In [20]:
df = spark.read.format('csv')\
          .option('header','true')\
          .option('inferSchema', 'true')\
          .option('timestamp', 'true')\
          .load('s3a://test234/ratings.csv')
# Prepare the data (assuming you have columns: 'user', 'item', 'rating')
df = df.withColumnRenamed('userId', 'user').withColumnRenamed('movieId', 'item').withColumnRenamed('rating', 'rating')
df.limit(3).toPandas()

Unnamed: 0,user,item,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224


### Train the model

In [21]:
# Create an ALS model
als = ALS(rank=10, maxIter=10, regParam=0.1, userCol='user', itemCol='item', ratingCol='rating')
model = als.fit(df)

### Evaluate the model

In [22]:
evaluator = RegressionEvaluator(metricName="rmse", labelCol="rating", predictionCol="prediction")
predictions = model.transform(df)
rmse = evaluator.evaluate(predictions)
print(f"Root Mean Squared Error (RMSE): {rmse}")

Root Mean Squared Error (RMSE): 0.5912968675064282


### Make recommendations for all users (for exapmle, user 0) 

In [23]:
user_recs = model.recommendForAllUsers(5)
user_0_recs = user_recs.where(user_recs.user == 24).select("recommendations.item", "recommendations.rating").collect()
print(f"Recommendations for user 24: {user_0_recs}")

Recommendations for user 24: [Row(item=[3379, 171495, 33649, 6442, 177593], rating=[4.850823879241943, 4.538029670715332, 4.48161506652832, 4.370961666107178, 4.368351936340332])]
