In [None]:
!pip install scikit-surprise
import itertools
from surprise import accuracy
from collections import defaultdict

In [None]:
# Measure accuracy of recommender
class RecommenderMetrics:

  # Get Mean Absolute Error
  def MAE(predictions):
    return accuracy.mae(predictions, verbose=False)

  # Get Root Mean Square Error
  def RMSE(predictions):
    return accuracy.rmse(predictions, verbose=False)

  # Get Top 10 predicitions with minimum rating 4.0
  def getTopN(predictions, n=10, minRating=4.0):
    # defaultdict gives default empty values for unused keys
    topN = defaultdict(list)


    for userID, movieID, actualRating, estimatedRating, _ in predictions:
      if(estimatedRating >= minRating):
        topN[int(userID)].append((int(movieID), estimatedRating))
    
    for userID, ratings in topN.items():
      ratings.sort(key = lambda x: x[1], reverse=True)
      topN[int(userID)] = ratings[:n]

    return topN

In [None]:
# Leave-One-Out Cross Validation
def hitRate(topNPredicted, leftOutPredictions):
  hits = 0
  total = 0

  # For each left-out rating
  for leftOut in leftOutPredictions:
    userID = leftOut[0]
    leftOutMovieID = leftOut[1]
    # Is it in user's predicted Top 10
    hit = False
    for movieID, predictedRating in topNPredicted[int(userID)]:
      if (int(leftOutMovieID) == int(movieID)):
        hit = True
        break
    if (hit):
     hits += 1

    total += 1

  # Compute overall precision
  return hits/total  

In [None]:
# Cumuative Hit Rank
def cumuativeHitRank(topNPredicted, leftOutPredictions, ratingCutoff=0):
  hits = 0
  total = 0

  hits = 0
  total = 0

  # For each left-out rating
  for userID, leftOutMovieID, actualRating, estimateRating, _ in leftOutPredictions:
    # Only look at ability to recommend what users actually liked
    if (actualRating >= ratingCutoff):
      # Is it in user's predicted Top 10
      hit = False

      for movieID, predictedRating in topNPredicted[int(userID)]:
         if (int(leftOutMovieID) == movieID):
           hit = True
           break
      if (hit):
        hits += 1
      
      total += 1

  # Compute overall precision
  return hits/total  

In [None]:
# Rating Hit Rate
def ratingHitRate(topNPredicted,leftOutPredictions):
  hits = defaultdict(float)
  total = defaultdict(float)

  # For each left-out rating
  for userID, leftOutMovieID, actualRating, estimateRating, _ in leftOutPredictions:
    # Only look at ability to recommend what users actually liked
    hit = False
    
    for movieID, predictedRating in topNPredicted[int(userID)]:
      if (int(leftOutMovieID) == movieID):
        hit = True
        break
    if (hit):
      hits[actualRating] += 1
    
    total[actualRating] += 1

  # Compute overall precision
  for rating in sorted(hits.keys()):
    print(rating,hits[rating]/total[rating])

In [None]:
# Average Reciprocal Hit Rate
def averageReciprocalHitRate(topNPredicted,leftOutPredictions):
  summation = 0
  total = 0

  # For each left-out rating
  for userID, leftOutMovieID, actualRating, estimateRating, _ in leftOutPredictions:
    # Only look at ability to recommend what users actually liked
    hitRank = 0
    rank = 0
    
    for movieID, predictedRating in topNPredicted[int(userID)]:
      rank = rank + 1
      if (int(leftOutMovieID) == movieID):
        hitRank = rank
        break
    if (hitRank > 0):
      summation += 1.0/hitRank
    
    total += 1

  return summation/total  

In [None]:
# What % of users have at least 1 'good' recommendation
def userCoverage(topNPredicted, numUsers, ratingThreshold=0):
  hits = 0
  for userID in topNPredicted.keys():
    hit = False
    for movieID, predictedRating in topNPredicted(userID):
      if (predictedRating >= ratingThreshold):
        hit = True
        break
    if (hit):
      hits += 1
  
  return hits/numUsers

In [None]:
# How broad a varity of items system is giving users
def Diversity(topNPredicted, simsAlgo):
  n = 0
  total = 0
  simsMatrix = simsAlgo.compute_similarities()
  for userID in topNPredicted.keys():
    # Every combo of item pairs in Top N
    pairs = itertools.combinations(topNPredicted[userID],2)
    for pair in pairs:
      movie1 = pair[0][0]
      movie2 = pair[1][0]
      # Surprise maintains internal ids for both sequential users and items
      # Different from raw user/movieID so convert raw ids to inner ids
      innerID1 = simsAlgo.trainset.to_inner_iid(str(movie1))
      innerID2 = simsAlgo.trainset.to_inner_iid(str(movie2))
      similarity = simsMatrix[innerID1][InnerID2]
      total = similarity
      n += 1

  # Sum scores, get avg and subtract from 1
  S = total/n
  return (1-S)

In [None]:
# How popular the items are being recommended
def Novelty(topNPredicted, rankings):
  n = 0
  total = 0
  for userID in topNPredicted.keys():
    for rating in topNPredicted[userID]:
      movieID = ratings[0]
      rank = rankings[movieID]
      total += similarity
      n += 1

  return total/n    