# User-based CF

In [1]:
from Electronics import Electronics
from surprise import KNNBasic
import heapq
from collections import defaultdict
from operator import itemgetter

In [3]:
testSubject = "AGHZXQL9F94T9"
k = 10

# Load our data set and compute the user similarity matrix
ml = Electronics()
data = ml.loadElectronics()

trainSet = data.build_full_trainset()

sim_options = {'name': 'cosine',
               'user_based': True
               }

model = KNNBasic(sim_options=sim_options)
model.fit(trainSet)
simsMatrix = model.compute_similarities()

Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.


In [4]:
testUserInnerID = trainSet.to_inner_uid(testSubject)
similarityRow = simsMatrix[testUserInnerID]

similarUsers = []
for innerID, score in enumerate(similarityRow):
    if (innerID != testUserInnerID):
        similarUsers.append( (innerID, score) )

kNeighbors = heapq.nlargest(k, similarUsers, key=lambda t: t[1])

In [5]:
candidates = defaultdict(float)
for similarUser in kNeighbors:
    innerID = similarUser[0]
    userSimilarityScore = similarUser[1]
    theirRatings = trainSet.ur[innerID]
    for rating in theirRatings:
        candidates[rating[0]] += (rating[1] / 5.0) * userSimilarityScore

In [6]:
purchased = {}
for itemID, rating in trainSet.ur[testUserInnerID]:
    purchased[itemID] = 1

In [8]:
pos = 0
for itemID, ratingSum in sorted(candidates.items(), key=itemgetter(1), reverse=True):
    if not itemID in purchased:
        productId = trainSet.to_raw_iid(itemID)
        print(ml.getProductName(productId), ratingSum)
        pos += 1
        if (pos > 10):
            break

B000089GN3 3.0
B003ES5ZUU 3.0
B003L1ZYZ6 3.0
B00829TIEK 2.8
B008DWCRQW 2.6
B00001P4ZH 2.6
B00452V66G 2.4
B000BKJZ9Q 2.2
B00004RC2D 2.0
B0001DBEM4 2.0
B000EVSLRO 2.0


# Item-based CF

In [9]:
testSubject = "AGHZXQL9F94T9"
k = 10

ml = Electronics()
data = ml.loadElectronics()

trainSet = data.build_full_trainset()

sim_options = {'name': 'cosine',
               'user_based': False
               }

model = KNNBasic(sim_options=sim_options)
model.fit(trainSet)
simsMatrix = model.compute_similarities()

testUserInnerID = trainSet.to_inner_uid(testSubject)

Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.


In [10]:
testUserRatings = trainSet.ur[testUserInnerID]
kNeighbors = heapq.nlargest(k, testUserRatings, key=lambda t: t[1])

In [11]:
candidates = defaultdict(float)
for itemID, rating in kNeighbors:
    similarityRow = simsMatrix[itemID]
    for innerID, score in enumerate(similarityRow):
        candidates[innerID] += score * (rating / 5.0)

In [12]:
purchased = {}
for itemID, rating in trainSet.ur[testUserInnerID]:
    purchased[itemID] = 1

In [14]:
# Get top-rated items from similar items:
pos = 0
for itemID, ratingSum in sorted(candidates.items(), key=itemgetter(1), reverse=True):
    if not itemID in purchased:
        productId = trainSet.to_raw_iid(itemID)
        print(ml.getProductName(productId), ratingSum)
        pos += 1
        if (pos > 10):
            break

B00004SB92 5.0
B00002SWHH 4.0
B00004RC2D 4.0
B000050NON 4.0
B00000JSGF 3.0
B00000K4KH 3.0
B000021YUO 3.0
B000023VUL 3.0
B000028F42 3.0
B000038AB2 3.0
B00003G1RG 3.0


# Compare KNN CF 

In [15]:
from Electronics import Electronics
from surprise import KNNBasic, KNNWithMeans, KNNWithZScore
from Evaluator import Evaluator

import random
import numpy as np

In [16]:
def LoadData():
    ml = Electronics()
    print("Loading product ratings...")
    data = ml.loadElectronics()
    print("\nComputing product popularity ranks so we can measure novelty later...")
    rankings = ml.getPopularityRanks()
    return (ml, data, rankings)

np.random.seed(0)
random.seed(0)

In [17]:
# Load up common data set for the recommender algorithms
(ml, evaluationData, rankings) = LoadData()

# Construct an Evaluator to, you know, evaluate them
evaluator = Evaluator(evaluationData, rankings)

Loading product ratings...

Computing product popularity ranks so we can measure novelty later...
Estimating biases using als...
Computing the cosine similarity matrix...
Done computing similarity matrix.


In [18]:
# User-based KNN
UserKNN = KNNBasic(sim_options = {'name': 'cosine', 'user_based': True})
evaluator.AddAlgorithm(UserKNN, "User KNN")

In [19]:
# Item-based KNN
ItemKNN = KNNBasic(sim_options = {'name': 'cosine', 'user_based': False})
evaluator.AddAlgorithm(ItemKNN, "Item KNN")

In [20]:
kNN2 = KNNWithMeans(sim_options = {'name': 'cosine', 'user_based': True})
evaluator.AddAlgorithm(kNN2, "User KNNWithMeans")

In [21]:
kNN3 = KNNWithZScore(sim_options = {'name': 'cosine', 'user_based': True})
evaluator.AddAlgorithm(kNN3, "User KNNWithZScore")

In [22]:
evaluator.Evaluate(False)

evaluator.SampleTopNRecs(ml)

Evaluating  User KNN ...
Evaluating accuracy...
Computing the cosine similarity matrix...
Done computing similarity matrix.
Analysis complete.
Evaluating  Item KNN ...
Evaluating accuracy...
Computing the cosine similarity matrix...
Done computing similarity matrix.
Analysis complete.
Evaluating  User KNNWithMeans ...
Evaluating accuracy...
Computing the cosine similarity matrix...
Done computing similarity matrix.
Analysis complete.
Evaluating  User KNNWithZScore ...
Evaluating accuracy...
Computing the cosine similarity matrix...
Done computing similarity matrix.
Analysis complete.


Algorithm  RMSE       MAE       
User KNN   1.0706     0.7539    
Item KNN   1.0515     0.7462    
User KNNWithMeans 1.0155     0.7143    
User KNNWithZScore 1.0172     0.7082    

Legend:

RMSE:      Root Mean Squared Error. Lower values mean better accuracy.
MAE:       Mean Absolute Error. Lower values mean better accuracy.

Using recommender  User KNN

Building recommendation model...
Computing the co