In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np
import feather
import scipy.sparse as sp
from sklearn.metrics.pairwise import cosine_similarity

In [5]:
book_profiles = feather.read_dataframe('./data/book_profiles').set_index('book_id')
books = feather.read_dataframe('./data/books_small').set_index('book_id')
training_ratings = feather.read_dataframe('./data/training_ratings')
testing_ratings = feather.read_dataframe('./data/testing_ratings')

In [6]:
def get_profile(book_id):
    """
    Get the top 10 features of a book given an id
    """
    profile = book_profiles.loc[book_id].sort_values(ascending=False)
    print("Book title:", books.loc[book_id].title)
    profile = profile[profile>0]
    return pd.DataFrame({
        "tag": profile.index,
        "importance": profile.values
    })
    
get_profile(2).head(10)

Book title: Harry Potter and the Sorcerer's Stone (Harry Potter, #1)


Unnamed: 0,tag,importance
0,fantasy,0.204232
1,MaryGrandPré,0.204232
2,J.K.Rowling,0.204232
3,young-adult,0.182359
4,fiction,0.180011
5,harry-potter,0.171738
6,ya,0.162564
7,series,0.162068
8,magic,0.158694
9,childrens,0.15648


In [5]:
book_sim = pd.DataFrame(
    data = cosine_similarity(book_profiles, book_profiles),
    index = book_profiles.index,
    columns = book_profiles.index
)

book_sim.head()

book_id,27,21,2,18,24,3275,3753,54,337,374,...,5884,5296,8713,7443,6428,7523,4594,9569,9580,8892
book_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
27,1.0,0.793039,0.967074,0.935959,0.932615,0.773161,0.826935,0.443948,0.383335,0.145548,...,0.385404,0.279734,0.220817,0.665664,0.613295,0.679846,0.121582,0.397349,0.165843,0.226996
21,0.793039,1.0,0.781584,0.756703,0.802735,0.606846,0.642205,0.371239,0.290013,0.128655,...,0.299041,0.26487,0.181788,0.4833,0.445195,0.511246,0.116972,0.341285,0.114551,0.164359
2,0.967074,0.781584,1.0,0.954254,0.95135,0.779767,0.8387,0.463165,0.400693,0.146866,...,0.399699,0.256885,0.199595,0.665316,0.612849,0.679313,0.122899,0.366076,0.148561,0.172504
18,0.935959,0.756703,0.954254,1.0,0.919456,0.750132,0.813695,0.444069,0.384745,0.159378,...,0.389971,0.264809,0.192761,0.64641,0.60713,0.660081,0.135276,0.368633,0.142581,0.166399
24,0.932615,0.802735,0.95135,0.919456,1.0,0.741594,0.802514,0.488001,0.413613,0.164158,...,0.393429,0.273214,0.19893,0.636164,0.583238,0.648697,0.13925,0.39871,0.146673,0.171259


# 1. The new evaluator

In [6]:
# Use average precision instead of precision/recall
class Evaluator():
    def __init__(self, k=10, training_set=None, testing_set=None):
        self.k = k
        if training_set is not None:
            self.training_set = training_set
            self.num_users = len(self.training_set.user_id.unique())
            self.num_books = len(self.training_set.book_id.unique())
        if testing_set is not None:
            self.testing_set = testing_set
            self.testing_idx = {}
            for user_id in testing_set.user_id.unique():
                self.testing_idx[user_id] = testing_set[testing_set.user_id==user_id].book_id.values
        self.result = {}
    
    def _average_precision(self, pred, truth):
        in_arr = np.in1d(pred, truth)
        score = 0.0
        num_hits = 0.0
        for idx, correct in enumerate(in_arr):
            if correct:
                num_hits += 1
                score += num_hits / (idx + 1)
        return score / min(len(truth), self.k)
    
    def evaluate(self, model):
        model.fit(self.training_set)
        preds = model.all_recommendation()
        book_recommended = set([])
        ap_sum = 0
        for user_id in preds.keys():
            pred = preds[user_id]
            truth = self.testing_idx[user_id]
            ap_sum += self._average_precision(pred, truth)
            for book_id in pred:
                book_recommended.add(book_id)
        
        self.result[model.name] = {}
        self.result[model.name]['Mean Average Precision'] = ap_sum / self.num_users
        self.result[model.name]['Coverage'] = len(book_recommended) / self.num_books
        
    def print_result(self):
        print(pd.DataFrame(self.result))

# 2. Random-based recommender system

In [7]:
class RandomRecommender():
    name = "Random-based RS"
    
    def fit(self, training_ratings):
        user_ids = training_ratings.user_id.unique().tolist()
        book_ids = training_ratings.book_id.unique().tolist()
        self.preds = {}
        np.random.seed(42)
        for user_id in user_ids:
            excluded_books = training_ratings[training_ratings.user_id==user_id].book_id.unique().tolist()
            recommendable = np.setdiff1d(book_ids, excluded_books)
            self.preds[user_id] = np.random.permutation(recommendable)[:10]
        
    def recommendation_for_user(self, user_id):
        if user_id not in self.preds:
            return []
        return self.preds[user_id]
    
    def all_recommendation(self):
        return self.preds

In [8]:
evl = Evaluator(k=10, training_set=training_ratings, testing_set=testing_ratings)

In [9]:
%%time
rd_rec = RandomRecommender()
evl.evaluate(rd_rec)
evl.print_result()

                        Random-based RS
Coverage                       0.646810
Mean Average Precision         0.000394
Wall time: 2.37 s


# 3. Popularity-based recommender system

In [10]:
# The previous popularity list
C = books.average_rating.mean()
m = books.ratings_count.quantile(0.95)

top_books = books[books.ratings_count > m]

def weighted_rating(x):
    v = x.ratings_count
    R = x.average_rating
    return (v/(v+m) * R) + (m/(m+v) * C)

top_books.loc[:, 'weighted_rating'] = top_books.apply(weighted_rating, axis=1)

top_books.sort_values(by=['weighted_rating'], ascending=False, inplace=True)

top_books.head(10)[['title', 'weighted_rating']]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


Unnamed: 0_level_0,title,weighted_rating
book_id,Unnamed: 1_level_1,Unnamed: 2_level_1
25,Harry Potter and the Deathly Hallows (Harry Po...,4.554188
27,Harry Potter and the Half-Blood Prince (Harry ...,4.4888
18,Harry Potter and the Prisoner of Azkaban (Harr...,4.483602
24,Harry Potter and the Goblet of Fire (Harry Pot...,4.481684
2,Harry Potter and the Sorcerer's Stone (Harry P...,4.423813
21,Harry Potter and the Order of the Phoenix (Har...,4.417691
31,The Help,4.40368
39,"A Game of Thrones (A Song of Ice and Fire, #1)",4.397096
135,"A Storm of Swords (A Song of Ice and Fire, #3)",4.39281
422,"Harry Potter Boxset (Harry Potter, #1-7)",4.384479


It doesn't make sense to recommend 7 Harry Potter books in one recommendation (or any 2 of the same franchise), even if the their weighted ratings are high. We'll make a workaround be only adding the FIRST book of the franchise to our list. We can do this by filtering the books based on their titles.

In [11]:
print(books[books.title.str.contains("#") & ~books.title.str.contains("\)")].title)
included = books[books.title.str.contains("#") & ~books.title.str.contains("\)")].index.drop(6429).tolist()

book_id
2100       Batman: The Dark Knight Returns #1
2151                                  Saga #1
2855                                #GIRLBOSS
3765         Anne McCaffrey's Dragonflight #1
6429                        Nancy Drew: #1-64
8701    Sin City: Una Dura Despedida, #1 de 3
Name: title, dtype: object


In [12]:
# Manga!
print(books[books.title.str.contains("01")].head().title)
included = np.append(included, books[books.title.str.contains("01")].index.tolist())

book_id
502              The Walking Dead, Vol. 01: Days Gone Bye
503             2001: A Space Odyssey (Space Odyssey, #1)
720                                     Bleach, Volume 01
835     Naruto, Vol. 01: The Tests of the Ninja (Narut...
1313    Band of Brothers: E Company, 506th Regiment, 1...
Name: title, dtype: object


In [13]:
print(books[books.title.str.contains("#1\)")].head().title)
print(books[books.title.str.contains("#1\)")].shape)
included = np.append(included, books[books.title.str.contains("#1\)")].index.tolist())

book_id
1               The Hunger Games (The Hunger Games, #1)
2     Harry Potter and the Sorcerer's Stone (Harry P...
3                               Twilight (Twilight, #1)
9                 Angels & Demons  (Robert Langdon, #1)
12                            Divergent (Divergent, #1)
Name: title, dtype: object
(1535, 22)


In [14]:
print(books[books.title.str.contains("#")].shape)
excluded = books[books.title.str.contains("#")].index.tolist()

(4271, 22)


In [15]:
excluded = np.setdiff1d(np.unique(excluded), np.unique(included))
print(len(excluded))

2729


In [16]:
# The new top books
filtered_books = books.drop(excluded)
C = filtered_books.average_rating.mean()
m = filtered_books.ratings_count.quantile(0.95)

top_books = filtered_books[filtered_books.ratings_count > m]

def weighted_rating(x):
    v = x.ratings_count
    R = x.average_rating
    return (v/(v+m) * R) + (m/(m+v) * C)

top_books.loc[:, 'weighted_rating'] = top_books.apply(weighted_rating, axis=1)

top_books.sort_values(by=['weighted_rating'], ascending=False, inplace=True)

top_books.head(10)[['title', 'weighted_rating']]

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app


Unnamed: 0_level_0,title,weighted_rating
book_id,Unnamed: 1_level_1,Unnamed: 2_level_1
2,Harry Potter and the Sorcerer's Stone (Harry P...,4.419512
31,The Help,4.392109
39,"A Game of Thrones (A Song of Ice and Fire, #1)",4.384036
192,The Name of the Wind (The Kingkiller Chronicle...,4.349967
1,"The Hunger Games (The Hunger Games, #1)",4.324357
19,The Fellowship of the Ring (The Lord of the Ri...,4.300449
47,The Book Thief,4.299851
85,The Giving Tree,4.285011
267,The Nightingale,4.280549
144,"Unbroken: A World War II Story of Survival, Re...",4.269676


Looks much more reasonable!

In [17]:
top_books.shape

(343, 23)

In [18]:
class PopularityRecommender():
    name = "Popularity-based RS"
    
    def fit(self, training_ratings):
        user_ids = training_ratings.user_id.unique().tolist()
        book_ids = top_books.index.tolist()
        self.preds = {}
        for user_id in user_ids:
            excluded_books = training_ratings[training_ratings.user_id==user_id].book_id.unique().tolist()
            recommendable = np.setdiff1d(book_ids, excluded_books)
            self.preds[user_id] = recommendable[:10]
        
    def recommendation_for_user(self, user_id):
        return self.preds[user_id]
    
    def all_recommendation(self):
        return self.preds

In [19]:
%%time
pop_rec = PopularityRecommender()
evl.evaluate(pop_rec)
evl.print_result()

                        Random-based RS  Popularity-based RS
Coverage                       0.646810             0.003703
Mean Average Precision         0.000394             0.037694
Wall time: 810 ms


# 4. Measuring novelty

Paper: https://arxiv.org/pdf/0808.2670.pdf

For our application, the Novelty Score is measure by the average log inverse ratio of users who have rated the item. For that we need to create a new novelty score dataframe

In [20]:
all_ratings = pd.read_csv('./data/ratings.csv')
all_ratings.shape

(5976479, 3)

In [21]:
n_users = len(all_ratings.user_id.unique())
n_users

53424

In [22]:
rating_count = all_ratings.groupby('book_id').count()[['rating']]
rating_count.head()

Unnamed: 0_level_0,rating
book_id,Unnamed: 1_level_1
1,22806
2,21850
3,16931
4,19088
5,16604


In [23]:
rating_count.loc[:, 'novelty_score'] = np.log2(n_users / rating_count.rating)

In [24]:
sorted_scores = rating_count.sort_values(by="novelty_score", ascending=True)
sorted_scores.head()

Unnamed: 0_level_0,rating,novelty_score
book_id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,22806,1.228075
2,21850,1.289855
4,19088,1.484822
3,16931,1.657821
5,16604,1.685957


In [25]:
sorted_scores.tail()

Unnamed: 0_level_0,rating,novelty_score
book_id,Unnamed: 1_level_1,Unnamed: 2_level_1
9315,36,10.535275
1935,33,10.660806
9486,24,11.120238
9345,11,12.245769
7803,8,12.7052


In [26]:
novelty_scores = sorted_scores[['novelty_score']]
novelty_scores.head(10)

Unnamed: 0_level_0,novelty_score
book_id,Unnamed: 1_level_1
1,1.228075
2,1.289855
4,1.484822
3,1.657821
5,1.685957
17,1.690744
20,1.74366
18,1.75255
23,1.77068
7,1.779831


And then we just need to update the evaluator with this new metric

In [27]:
class Evaluator():
    def __init__(self, k=10, training_set=None, testing_set=None):
        self.k = k
        if training_set is not None:
            self.training_set = training_set
            self.num_users = len(self.training_set.user_id.unique())
            self.num_books = len(self.training_set.book_id.unique())
        if testing_set is not None:
            self.testing_set = testing_set
            self.testing_idx = {}
            for user_id in testing_set.user_id.unique():
                self.testing_idx[user_id] = testing_set[testing_set.user_id==user_id].book_id.values
        self.result = {}
    
    def _average_precision(self, pred, truth):
        in_arr = np.in1d(pred, truth)
        score = 0.0
        num_hits = 0.0
        for idx, correct in enumerate(in_arr):
            if correct:
                num_hits += 1
                score += num_hits / (idx + 1)
        return score / min(len(truth), self.k)
    
    def _novelty_score(self, pred):
        return novelty_scores.loc[pred].novelty_score.mean()
    
    def evaluate(self, model):
        model.fit(self.training_set)
        preds = model.all_recommendation()
        book_recommended = set([])
        ap_sum = 0
        nov_score_sum = 0
        for user_id in preds.keys():
            pred = preds[user_id]
            truth = self.testing_idx[user_id]
            ap_sum += self._average_precision(pred, truth)
            nov_score_sum += self._novelty_score(pred)
            for book_id in pred:
                book_recommended.add(book_id)
        
        self.result[model.name] = {}
        self.result[model.name]['Mean Average Precision'] = "%.2f%%" % (ap_sum / self.num_users * 100)
        self.result[model.name]['Coverage'] = "%.2f%%" % (len(book_recommended) / self.num_books * 100)
        self.result[model.name]['Novelty Score'] = "%.2f" % (nov_score_sum / self.num_users)
        
    def print_result(self):
        print(pd.DataFrame(self.result))

In [28]:
%%time
evl = Evaluator(k=10, training_set=training_ratings, testing_set=testing_ratings)
evl.evaluate(rd_rec)
evl.evaluate(pop_rec)
evl.print_result()

                       Random-based RS Popularity-based RS
Coverage                        64.68%               0.37%
Mean Average Precision           0.04%               3.77%
Novelty Score                     7.35                1.84
Wall time: 4.47 s


As expected, our random-based RS has a much higher novelty score compared to the popularity-based RS, intuitively because popularity-based RS only recommend POPULAR items (which are guaranteed to have lower novelty score)

# 5. Measuring diversity

Paper: http://files.grouplens.org/papers/ziegler-www05.pdf

The metric we will use for diversity is a formula using intra list similarity (ILS). We will calculate Diversity Score = (1 - ILS) * 10 (so that the score range would range from 1-10).

In [29]:
book_sim.head()

book_id,27,21,2,18,24,3275,3753,54,337,374,...,5884,5296,8713,7443,6428,7523,4594,9569,9580,8892
book_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
27,1.0,0.793039,0.967074,0.935959,0.932615,0.773161,0.826935,0.443948,0.383335,0.145548,...,0.385404,0.279734,0.220817,0.665664,0.613295,0.679846,0.121582,0.397349,0.165843,0.226996
21,0.793039,1.0,0.781584,0.756703,0.802735,0.606846,0.642205,0.371239,0.290013,0.128655,...,0.299041,0.26487,0.181788,0.4833,0.445195,0.511246,0.116972,0.341285,0.114551,0.164359
2,0.967074,0.781584,1.0,0.954254,0.95135,0.779767,0.8387,0.463165,0.400693,0.146866,...,0.399699,0.256885,0.199595,0.665316,0.612849,0.679313,0.122899,0.366076,0.148561,0.172504
18,0.935959,0.756703,0.954254,1.0,0.919456,0.750132,0.813695,0.444069,0.384745,0.159378,...,0.389971,0.264809,0.192761,0.64641,0.60713,0.660081,0.135276,0.368633,0.142581,0.166399
24,0.932615,0.802735,0.95135,0.919456,1.0,0.741594,0.802514,0.488001,0.413613,0.164158,...,0.393429,0.273214,0.19893,0.636164,0.583238,0.648697,0.13925,0.39871,0.146673,0.171259


In [30]:
class Evaluator():
    def __init__(self, k=10, training_set=None, testing_set=None):
        self.k = k
        if training_set is not None:
            self.training_set = training_set
            self.num_users = len(self.training_set.user_id.unique())
            self.num_books = len(self.training_set.book_id.unique())
        if testing_set is not None:
            self.testing_set = testing_set
            self.testing_idx = {}
            for user_id in testing_set.user_id.unique():
                self.testing_idx[user_id] = testing_set[testing_set.user_id==user_id].book_id.values
        self.result = {}
    
    def _average_precision(self, pred, truth):
        in_arr = np.in1d(pred, truth)
        score = 0.0
        num_hits = 0.0
        for idx, correct in enumerate(in_arr):
            if correct:
                num_hits += 1
                score += num_hits / (idx + 1)
        return score / min(len(truth), self.k)
    
    def _novelty_score(self, pred):
        return novelty_scores.loc[pred].novelty_score.mean()
    
    def _diversity_score(self, pred):
        matrix = book_sim.loc[pred, pred].values
        ils = matrix[np.triu_indices(len(pred), k=1)].mean()
        return (1 - ils) * 10
    
    def evaluate(self, model):
        model.fit(self.training_set)
        preds = model.all_recommendation()
        book_recommended = set([])
        ap_sum = 0
        nov_score_sum = 0
        div_score_sum = 0
        for user_id in preds.keys():
            pred = preds[user_id]
            truth = self.testing_idx[user_id]
            ap_sum += self._average_precision(pred, truth)
            nov_score_sum += self._novelty_score(pred)
            div_score_sum += self._diversity_score(pred)
            for book_id in pred:
                book_recommended.add(book_id)
        
        self.result[model.name] = {}
        self.result[model.name]['Mean Average Precision'] = "%.2f%%" % (ap_sum / self.num_users * 100)
        self.result[model.name]['Coverage'] = "%.2f%%" % (len(book_recommended) / self.num_books * 100)
        self.result[model.name]['Novelty Score'] = "%.2f" % (nov_score_sum / self.num_users)
        self.result[model.name]['Diversity Score'] = "%.2f" % (div_score_sum / self.num_users)
        
    def print_result(self):
        print(pd.DataFrame(self.result))

In [31]:
%%time
evl = Evaluator(k=10, training_set=training_ratings, testing_set=testing_ratings)
evl.evaluate(rd_rec)
evl.evaluate(pop_rec)
evl.print_result()

                       Random-based RS Popularity-based RS
Coverage                        64.68%               0.37%
Diversity Score                   7.30                5.51
Mean Average Precision           0.04%               3.77%
Novelty Score                     7.35                1.84
Wall time: 10.2 s


High diversity score for Random-based RS is expected. Average diversity score for Popularity-based RS is the result of only keeping the FIRST book of a series, which lower down the similarity significantly.

# 6. Measuring Personalization

Paper: https://arxiv.org/pdf/0808.2670.pdf

Score will be calculated based on the inter-list distance

In [51]:
class Evaluator():
    def __init__(self, k=10, training_set=None, testing_set=None):
        self.k = k
        if training_set is not None:
            self.training_set = training_set
            self.num_users = len(self.training_set.user_id.unique())
            self.num_books = len(self.training_set.book_id.unique())
        if testing_set is not None:
            self.testing_set = testing_set
            self.testing_idx = {}
            for user_id in testing_set.user_id.unique():
                self.testing_idx[user_id] = testing_set[testing_set.user_id==user_id].book_id.values
        self.result = {}
    
    def _average_precision(self, pred, truth):
        in_arr = np.in1d(pred, truth)
        score = 0.0
        num_hits = 0.0
        for idx, correct in enumerate(in_arr):
            if correct:
                num_hits += 1
                score += num_hits / (idx + 1)
        return score / min(len(truth), self.k)
    
    def _novelty_score(self, pred):
        # Recommend the top 10 books in novelty score results in ~10.4
        # Crop the score to 10.0 since it won't change anything and make the score range nicer
        return min(novelty_scores.loc[pred].novelty_score.mean(), 10.0)
    
    def _diversity_score(self, pred):
        matrix = book_sim.loc[pred, pred].values
        ils = matrix[np.triu_indices(len(pred), k=1)].mean()
        return (1 - ils) * 10
    
    def _personalization_score(self, preds, user_ids, book_ids):
        df = pd.DataFrame(
            data=np.zeros([len(user_ids), len(book_ids)]),
            index=user_ids,
            columns=book_ids
        )
        for user_id in user_ids:
            df.loc[user_id, preds[user_id]] = 1

        matrix = sp.csr_matrix(df.values)

        #calculate similarity for every user's recommendation list
        similarity = cosine_similarity(X=matrix, dense_output=False)

        #get indicies for upper right triangle w/o diagonal
        upper_right = np.triu_indices(similarity.shape[0], k=1)

        #calculate average similarity
        personalization = np.mean(similarity[upper_right])
        
        return (1 - personalization) * 10
    
    def evaluate(self, model):
        model.fit(self.training_set)
        preds = model.all_recommendation()
        user_ids = list(preds.keys())
        book_ids = np.unique(np.array(list(preds.values())).flatten())
        ap_sum = 0
        nov_score_sum = 0
        div_score_sum = 0
        for user_id in preds.keys():
            pred = preds[user_id]
            truth = self.testing_idx[user_id]
            ap_sum += self._average_precision(pred, truth)
            nov_score_sum += self._novelty_score(pred)
            div_score_sum += self._diversity_score(pred)
        
        self.result[model.name] = {}
        self.result[model.name]['Mean Average Precision'] = "%.2f%%" % (ap_sum / self.num_users * 100)
        self.result[model.name]['Coverage'] = "%.2f%%" % (len(book_ids) / self.num_books * 100)
        self.result[model.name]['Novelty Score'] = "%.2f" % (nov_score_sum / self.num_users)
        self.result[model.name]['Diversity Score'] = "%.2f" % (div_score_sum / self.num_users)
        self.result[model.name]['Personalization Score'] = "%.2f" % self._personalization_score(preds, user_ids, book_ids)
        
    def print_result(self):
        print(pd.DataFrame(self.result).loc[['Mean Average Precision', 'Coverage', 'Novelty Score', 'Diversity Score', 'Personalization Score']])

In [52]:
%%time
evl = Evaluator(k=10, training_set=training_ratings, testing_set=testing_ratings)
evl.evaluate(rd_rec)
evl.evaluate(pop_rec)
evl.print_result()

                       Random-based RS Popularity-based RS
Mean Average Precision           0.04%               3.77%
Coverage                        64.68%               0.37%
Novelty Score                     7.35                1.84
Diversity Score                   7.30                5.51
Personalization Score             9.99                3.77
Wall time: 10.4 s


Of course Random-based RS will have almost max personalization score since it's totally random. The popularity-based model has a surprisingly not-amazingly-bad personalization score. Probably removing items the users have already read help with this score.

# 7. Wrap up

That's it for our updated evaluator. All of our score range from 1 to 10 and our precision and coverage are percentage-based.

The evaluator's external dependencies:
- A novelty score dataframe
- A book similarity matrix

We will make bundle the evaluator inside a module (a python file) and inject the 2 dependencies in the constructor

In [54]:
novelty_scores.head()

Unnamed: 0_level_0,novelty_score
book_id,Unnamed: 1_level_1
1,1.228075
2,1.289855
4,1.484822
3,1.657821
5,1.685957


In [55]:
top_books.head()

Unnamed: 0_level_0,goodreads_book_id,best_book_id,work_id,books_count,isbn,isbn13,authors,original_publication_year,original_title,title,...,work_ratings_count,work_text_reviews_count,ratings_1,ratings_2,ratings_3,ratings_4,ratings_5,image_url,small_image_url,weighted_rating
book_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2,3,3,4640799,491,439554934,9780440000000.0,"J.K. Rowling, Mary GrandPré",1997.0,Harry Potter and the Philosopher's Stone,Harry Potter and the Sorcerer's Stone (Harry P...,...,4800065,75867,75504,101676,455024,1156318,3011543,https://images.gr-assets.com/books/1474154022m...,https://images.gr-assets.com/books/1474154022s...,4.419512
31,4667024,4667024,4717423,183,399155341,9780399000000.0,Kathryn Stockett,2009.0,The Help,The Help,...,1603545,78204,10235,25117,134887,490754,942552,https://images.gr-assets.com/books/1346100365m...,https://images.gr-assets.com/books/1346100365s...,4.392109
39,13496,13496,1466917,101,553588486,9780554000000.0,George R.R. Martin,1996.0,A Game of Thrones,"A Game of Thrones (A Song of Ice and Fire, #1)",...,1442220,46205,19988,28983,114092,404583,874574,https://images.gr-assets.com/books/1436732693m...,https://images.gr-assets.com/books/1436732693s...,4.384036
192,186074,186074,2502879,123,075640407X,9780756000000.0,Patrick Rothfuss,2007.0,The Name of the Wind,The Name of the Wind (The Kingkiller Chronicle...,...,449372,28631,5605,8582,28480,95470,311235,https://images.gr-assets.com/books/1472068073m...,https://images.gr-assets.com/books/1472068073s...,4.349967
1,2767052,2767052,2792775,272,439023483,9780439000000.0,Suzanne Collins,2008.0,The Hunger Games,"The Hunger Games (The Hunger Games, #1)",...,4942365,155254,66715,127936,560092,1481305,2706317,https://images.gr-assets.com/books/1447303603m...,https://images.gr-assets.com/books/1447303603s...,4.324357


Finally, we will save all the useful dataframe that we want to use later on

In [59]:
top_books.reset_index().to_feather('./data/top_books')
novelty_scores.reset_index().to_feather('./data/novelty_scores')

# 8. Test our evaluator

In [61]:
from evaluator import Evaluator

In [62]:
%%time
evl = Evaluator(
    k=10,
    training_set=training_ratings,
    testing_set=testing_ratings,
    book_sim=book_sim,
    novelty_scores=novelty_scores
)
evl.evaluate(rd_rec)
evl.evaluate(pop_rec)
evl.print_result()

                       Random-based RS Popularity-based RS
Mean Average Precision           0.04%               3.77%
Coverage                        64.68%               0.37%
Novelty Score                     7.35                1.84
Diversity Score                   7.30                5.51
Personalization Score             9.99                3.77
Wall time: 11.6 s


Perfect! We are ready to build more complicated and accurate model in the next few notebooks.