In [None]:
import sys
from pyspark import SparkConf, SparkContext
from pyspark.mllib.recommendation import ALS, Rating

In [None]:
def loadMovieNames():
    movieNames = {}
    with open("ml-1m/movies.dat", encoding='ascii', errors='ignore') as f:
        for line in f:
            fields = line.split('::')
            movieNames[int(fields[0])] = fields[1]
    return movieNames

In [None]:
conf = SparkConf().setMaster("local[*]").setAppName("MovieRecommendationsALS")
sc = SparkContext(conf = conf)
sc.setCheckpointDir('checkpoint')

In [None]:
print("\nLoading movie names...")
nameDict = loadMovieNames()

In [None]:
data = sc.textFile("file:///E:/SparkCourse/ml-1m/ratings.dat")

In [None]:
ratings = data.map(lambda l: l.split("::")).map(lambda l: Rating(int(l[0]), int(l[1]), float(l[2]))).cache()

Build the recommendation model using Alternating Least Squares

In [None]:
print("\nTraining recommendation model...")
rank = 10
numIterations = 20
model = ALS.train(ratings, rank, numIterations)

In [None]:
userID = int(sys.argv[1])

In [None]:
print("\nRatings for user ID " + str(userID) + ":")
userRatings = ratings.filter(lambda l: l[0] == userID)
for rating in userRatings.collect():
    print(nameDict[int(rating[1])] + ": " + str(rating[2]))

In [None]:
print("\nTop 10 recommendations:")
recommendations = model.recommendProducts(userID, 10)
for recommendation in recommendations:
    print(nameDict[int(recommendation[1])] + \
        " score " + str(recommendation[2]))