# Importing Libraries, Loading Data

In [1]:
import org.apache.spark.mllib.recommendation._

In [2]:
// this table has three columns
// 0 - user id
// 1 - artist id
// 2 - number of times the user has listened to this artist (implicit ratings)

val rawRDD = sc.textFile("user_artist_data_small.txt")
rawRDD.take(10)

rawRDD = user_artist_data_small.txt MapPartitionsRDD[1] at textFile at <console>:35


[1059637 1000010 238, 1059637 1000049 1, 1059637 1000056 1, 1059637 1000062 11, 1059637 1000094 1, 1059637 1000112 423, 1059637 1000113 5, 1059637 1000114 2, 1059637 1000123 2, 1059637 1000130 19129]

In [3]:
// total number of rows in the table
rawRDD.map(x => x.split(" ")(2).toDouble).stats()

(count: 49481, mean: 130.575797, stdev: 3034.354092, max: 439771.000000, min: 1.000000)

# User-Item Matrix

In [4]:
// creating the user-item matrix (Rating)
val uiMatrix = rawRDD.map(_.split(" ")).map(x => Rating(x(0).toInt, x(1).toInt, x(2).toInt))

uiMatrix = MapPartitionsRDD[5] at map at <console>:33


MapPartitionsRDD[5] at map at <console>:33

In [5]:
// this matrix will be used frequently, inside our recommendation process
// hence it is necessary to persist

uiMatrix.persist()

MapPartitionsRDD[5] at map at <console>:33

In [6]:
// number of reduced set of users

uiMatrix.count()

49481

# Matrix Factorization

In [7]:
// ALS.trainImplicit(user-item matrix, rank, iterations)

// trainImplicit is invoked as we use implicit ratings of the users
// user-item matrix is an RDD
// rank - number of latent factors
// iterations - number of times ALS should run

// returns an object of MatrixFactorizationModel
// the object holds ther user-feature matrix and the item-feature matrix

val model = ALS.trainImplicit(uiMatrix, 10, 5)

// alpha - 1 (default) - confidence parameter
// lambda - 0.01 (default) - regularization parameter
// val model = ALS.trainImplicit(uiMatrix, 10, 5, 0.01, 1)

model = org.apache.spark.mllib.recommendation.MatrixFactorizationModel@baf8706


org.apache.spark.mllib.recommendation.MatrixFactorizationModel@baf8706

In [11]:
var userID: Int = 1001440
var recommendations = model.recommendProducts(userID, 5)

userID = 1001440
recommendations = Array(Rating(1001440,1238230,2.5267438582581736), Rating(1001440,1004294,2.2814567026931103), Rating(1001440,1000418,1.4903283638602127), Rating(1001440,1006633,1.484148654312677), Rating(1001440,1010373,1.4308105224588665))


[Rating(1001440,1238230,2.5267438582581736), Rating(1001440,1004294,2.2814567026931103), Rating(1001440,1000418,1.4903283638602127), Rating(1001440,1006633,1.484148654312677), Rating(1001440,1010373,1.4308105224588665)]