In [25]:
#Load up the libraries
import pandas as pd
from surprise import SVD
from surprise import KNNBaseline
from surprise.model_selection import train_test_split
from surprise.model_selection import LeaveOneOut
from surprise import Reader
from surprise import Dataset
from surprise import accuracy
from surprise.model_selection import train_test_split
from surprise.model_selection import cross_validate
from collections import defaultdict

## Ratings Prediction

In [23]:
df = pd.read_csv('C:/Users/lianj/Desktop/Udemy/ml-100k/movies.txt', header = None, sep = '\t')
df.columns = ['userId', 'movieId', 'rating', 'timestamp']
reader = Reader(rating_scale=(0.5, 5))
data = Dataset.load_from_df(df[['userId', 'movieId', 'rating']], reader)
trainSet, testSet = train_test_split(data, test_size=.25, random_state=0)
algo = SVD(random_state=0)
algo.fit(trainSet)
predictions = algo.test(testSet)

def MAE(predictions):
        return accuracy.mae(predictions, verbose=False)
def RMSE(predictions):
        return accuracy.rmse(predictions, verbose=False)
    
print("RMSE: ", RMSE(predictions))
print("MAE: ", MAE(predictions))

RMSE:  0.9467849999896248
MAE:  0.7464520353995886


In [26]:
# Run 5-fold cross-validation and print results.
cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9370  0.9333  0.9374  0.9382  0.9418  0.9375  0.0027  
MAE (testset)     0.7380  0.7384  0.7383  0.7435  0.7401  0.7397  0.0021  
Fit time          3.88    4.38    4.35    5.35    4.21    4.43    0.49    
Test time         0.18    0.12    0.11    0.11    0.14    0.13    0.03    


{'test_rmse': array([0.93696997, 0.93331873, 0.93744167, 0.93819548, 0.94175364]),
 'test_mae': array([0.73802902, 0.73837263, 0.7383334 , 0.74352991, 0.74005939]),
 'fit_time': (3.8781239986419678,
  4.382630109786987,
  4.348900079727173,
  5.347845077514648,
  4.209790468215942),
 'test_time': (0.17554354667663574,
  0.11870956420898438,
  0.10933446884155273,
  0.10870909690856934,
  0.13962674140930176)}

## Top N

In [27]:
def GetTopN(predictions, n=10, minimumRating=4.0):
    '''
    input = predictions
    output = top N predictions for a user and stored in a defalut dictionary
    '''
    topN = defaultdict(list)
    for userID, movieID, actualRating, estimatedRating, _ in predictions:
        if (estimatedRating >= minimumRating):
            topN[int(userID)].append((int(movieID), estimatedRating))

    for userID, ratings in topN.items():
        ratings.sort(key=lambda x: x[1], reverse=True)
        topN[int(userID)] = ratings[:n]

    return topN

In [28]:
LOOCV = LeaveOneOut(n_splits=1, random_state=1)

for trainSet, testSet in LOOCV.split(data):
    # Train model without left-out ratings
    algo.fit(trainSet)
    # Predicts ratings for left-out ratings only
    leftOutPredictions = algo.test(testSet)
    # Build predictions for all ratings not in the training set
    bigTestSet = trainSet.build_anti_testset()
    allPredictions = algo.test(bigTestSet)
    # Compute top 10 recs for each user
    topNPredicted = GetTopN(allPredictions, n=10)

In [33]:
#Top 10 predictions for userId 196
topNPredicted[196]

[(64, 4.516063507687135),
 (114, 4.509139907231062),
 (318, 4.492523960075983),
 (515, 4.437435258698688),
 (408, 4.4309402453055196),
 (480, 4.412107354531763),
 (50, 4.410538949398632),
 (169, 4.364397262145547),
 (483, 4.352133872803232),
 (513, 4.33617690355655)]

## Hit Rate

- We will use hit rate to evaluate how good our top N ratings

- The process of compute hit rate for a single user:
   - Find all items in this user’s history in the training data.
   - Intentionally remove one of these items ( Leave-One-Out cross-validation).
   - Use all other items to feed the recommender and ask for top 10 recommendations.
   - If the removed item appear in the top 10 recommendations, it is a hit. If not, it’s not a hit.

In [34]:
# See how often we recommended a movie the user actually rated
def HitRate(topNPredicted, leftOutPredictions):
    hits = 0
    total = 0

 # For each left-out rating
    for leftOut in leftOutPredictions:
        userID = leftOut[0]
        leftOutMovieID = leftOut[1]
        # Is it in the predicted top 10 for this user?
        hit = False
        for movieID, predictedRating in topNPredicted[int(userID)]:
            if (int(leftOutMovieID) == int(movieID)):
                hit = True
                break
        if (hit) :
            hits += 1

        total += 1

    # Compute overall precision
    return hits/total
print("\nHit Rate: ", HitRate(topNPredicted, leftOutPredictions))


Hit Rate:  0.03711558854718982


The whole hit rate of the system is the count of hits, divided by the test user count. It measures how often we are able to recommend a removed rating, higher is better.

## Hits By Ratings

- Breaking down hit rates by ratings to see which rating category is better predicted

In [35]:
def RatingHitRate(topNPredicted, leftOutPredictions):
    hits = defaultdict(float)
    total = defaultdict(float)
    # For each left-out rating
    for userID, leftOutMovieID, actualRating, estimatedRating, _ in leftOutPredictions:
        # Is it in the predicted top N for this user?
        hit = False
        for movieID, predictedRating in topNPredicted[int(userID)]:
            if (int(leftOutMovieID) == movieID):
                hit = True
                break
        if (hit) :
            hits[actualRating] += 1
        total[actualRating] += 1

    # Compute overall precision
    for rating in sorted(hits.keys()):
        print(rating, hits[rating] / total[rating])
print("Hit Rate by Rating value: ")
RatingHitRate(topNPredicted, leftOutPredictions)

Hit Rate by Rating value: 
2.0 0.009174311926605505
3.0 0.017699115044247787
4.0 0.025477707006369428
5.0 0.09734513274336283


## Cumulative Hit Rates

- In this case we can only look at hit rates in a defined range

In [37]:
def CumulativeHitRate(topNPredicted, leftOutPredictions, ratingCutoff=0):
    hits = 0
    total = 0
    # For each left-out rating
    for userID, leftOutMovieID, actualRating, estimatedRating, _ in leftOutPredictions:
        # Only look at ability to recommend things the users actually liked...
        if (actualRating >= ratingCutoff):
            # Is it in the predicted top 10 for this user?
            hit = False
            for movieID, predictedRating in topNPredicted[int(userID)]:
                if (int(leftOutMovieID) == movieID):
                    hit = True
                    break
            if (hit) :
                hits += 1
            total += 1

        # Compute overall precision
    return hits/total
print("Cumulative Hit Rate (rating >= 3.5): ", CumulativeHitRate(topNPredicted, leftOutPredictions, 3.5))

Cumulative Hit Rate (rating >= 3.5):  0.05555555555555555


## Average Reciprocal Hit Ranking (ARHR)

- Commonly used metric for ranking evaluation of Top-N recommender systems, that only takes into account where the first relevant result occurs. We get more credit for recommending an item in which user rated on the top of the rank than on the bottom of the rank. Higher is better.

In [38]:
# Compute ARHR
def AverageReciprocalHitRank(topNPredicted, leftOutPredictions):
    summation = 0
    total = 0
    # For each left-out rating
    for userID, leftOutMovieID, actualRating, estimatedRating, _ in leftOutPredictions:
        # Is it in the predicted top N for this user?
        hitRank = 0
        rank = 0
        for movieID, predictedRating in topNPredicted[int(userID)]:
            rank = rank + 1
            if (int(leftOutMovieID) == movieID):
                hitRank = rank
                break
        if (hitRank > 0) :
                summation += 1.0 / hitRank

        total += 1

    return summation / total

print("Average Reciprocal Hit Rank: ", AverageReciprocalHitRank(topNPredicted, leftOutPredictions))

Average Reciprocal Hit Rank:  0.013899829992088738
