# Business Recommender System using Apache Spark and Python


## Necessary Package Imports

In [80]:
import csv 
import random
import math
from pyspark.mllib.recommendation import ALS, MatrixFactorizationModel, Rating

## Loading data

In [108]:
data = list()
data1 = list()
included_cols = [12, 13, 11]
with open('../Sample Data/merged_BR3.csv') as csvfile:
    reader = csv.reader(csvfile)
    next(reader, None)
    for row in reader:
        if row[3] in ['Huntersville']:
            content = (int(float(row[12])), int(float(row[13])), float(row[11]))
            content1 = (int(float(row[13])), str(row[4]))
            data.append(tuple(content))
            data1.append(content1)
dataParallelized = sc.parallelize(data)
#dataParallelized.collect()

##  Splitting Data into Testing and Training Sets

In [82]:
#splitting the RDD into training and test datasets [.6, .4]
training_set, testing_set = dataParallelized.randomSplit([.6,.4], 13)
training_set.cache()
testing_set.cache()

print training_set.take(3)
print testing_set.take(3)

[(22, 3, 1.85), (23, 3, 2.0), (24, 3, 0.644444444444)]
[(26, 3, 5.0), (27, 3, 4.0), (1551, 68, 5.0)]


## Function to evaluate the model

In [78]:
def score(predict, actual):
    MSE = []
    count = 0.0
    for a in actual:
        for p in predict:
            if a[0] == p[0]:
                #print str(p[1]) + " " + str(a[1])
                count += 1
                SE = (a[1] - p[1])**2
                MSE.append(SE)
    if count == 0.0:
        return -1
    else:
        return sum(MSE)/count

def modelEval(mod, trainData, testData):
    test_userIDs = testData.map(lambda p: p[0]).distinct().collect()
    #print test_userIDs
    test_companyIDs = dataParallelized.map(lambda p: p[1]).distinct().collect()
    #print test_companyIDs
    trainSet = trainData.map(lambda x: (x[0], x[1])).filter(lambda x: x[0] in test_userIDs)
    trainSet = trainSet.groupByKey().map(lambda x: (x[0], list(x[1])))
    #print trainSet.take(3)
    #if bid not in [y[0] for y in x[1]]
    validationSet = trainSet.flatMap(lambda x: [(x[0],bid) for bid in test_companyIDs])
    #print validationSet.take(3)
    actualD = testData.map(lambda x: (x[0], (x[1], x[2]))).groupByKey()
    actualD = actualD.map(lambda x: (x[0], list(x[1]))).collectAsMap()
    #print actualD
    predictD = mod.predictAll(validationSet).map(lambda x: (x[0], (x[1], x[2])))
    predictD = predictD.groupByKey().map(lambda x: (x[0], sorted(list(x[1]), key=lambda score: score[1], reverse=True)))
    maxList = predictD.map(lambda x: x[1][0][1])
    minList = predictD.map(lambda x: x[1][-1][1])
    maxVal = max(maxList.collect())
    minVal = min(minList.collect())
    scale = maxVal - minVal
    predictD_scale = predictD.map(lambda x: (x[0], [(y[0],((y[1]-minVal)/scale)*6) for y in x[1]]))
    
    #print predictD.take(1)
    scores = []
    for entry in predictD_scale.collect():
        score_pe = score(entry[1], actualD[entry[0]])
        #print score_pe
        if score_pe != -1:
            scores.append(score_pe)
    MSE_score = sum(scores)/float(len(scores))
    RMSE_score = math.sqrt(MSE_score)
    return RMSE_score

## Constructing model for different ranks


In [83]:
ranks = [2, 10, 20]
for r in ranks:
    model = ALS.trainImplicit(training_set, rank=r)
    print "The model score for rank %d is %f" % (r, modelEval(model, training_set, testing_set))

The model score for rank 2 is 3.059966
The model score for rank 10 is 2.479931
The model score for rank 20 is 2.851298


## Chosing the rank of the best model

In [86]:
bestModel = ALS.trainImplicit(training_set, rank=10)
modelEval(model, training_set, testing_set)

2.851298481955958

## Trying Some Business Recommendations

In [112]:
user = data[0][-2]
name = list()
for x in bestModel.recommendProducts(user, 5):
    #print x.product
    for line in data1:
        if line[0] == x.product and line[1] not in name:
            name.append(line[1])
for i in range(len(name)):
    print("business %d: %s" % (i, name[i]))

business 0: Bad Daddy's Burger Bar
business 1: Red Rocks Cafe - Birkdale
business 2: Kung Foo Noodle
business 3: Cafe 100
business 4: Pinky's Westside Grill
