In [3]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np
import feather

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [82]:
# Copy from notebook 3.
class Evaluator():
    def __init__(self, k=10, training_set=None, testing_set=None):
        self.k = k
        self.training_set = training_set
        self.testing_set = testing_set
        self.result = {}
    
    def _precision(self):
        precisions = np.array([])
        for user_id in self.preds.columns:
            pred = self.preds[user_id]
            truth = self.testing_set[self.testing_set.user_id==user_id].book_id
            precisions = np.append(precisions, np.in1d(pred, truth).sum() / self.k)
        return precisions.mean()
    
    def _recall(self):
        recalls = np.array([])
        for user_id in self.preds.columns:
            pred = self.preds[user_id]
            truth = self.testing_set[self.testing_set.user_id==user_id].book_id
            recalls = np.append(recalls, np.in1d(pred, truth).sum() / truth.count())
        return recalls.mean()
    
    def evaluate(self, model):
        model.fit(self.training_set)
        self.preds = model.recommendation()
        self.result['precision'] = self._precision()
        self.result['recall'] = self._recall()
        
    def print_result(self):
        print(self.result)

In [9]:
# Import the data
books = feather.read_dataframe('./data/books_small')
training_ratings = feather.read_dataframe('./data/training_ratings')
testing_ratings = feather.read_dataframe('./data/testing_ratings')
(books.shape, training_ratings.shape, testing_ratings.shape)

((9590, 23), (97889, 3), (12445, 3))

In [78]:
class RandomRecommender():  
    def fit(self, training_ratings):
        self.user_ids = training_ratings.user_id.unique().tolist()
        self.book_ids = training_ratings.book_id.unique().tolist()
        np.random.seed(42)
        self.preds = pd.DataFrame()
        for user_id in self.user_ids:
            self.preds[user_id] = np.random.choice(self.book_ids, 10, replace=False)
        
    def recommend_to_user(self, user_id):
        return self.preds[user_id]
    
    def recommendation(self):
        return self.preds

In [79]:
rd_rec = RandomRecommender()
rd_rec.fit(training_ratings)

In [84]:
recs = rd_rec.recommendation()
recs[80]

0    3735
1    1663
2    4384
3    7816
4    3109
5     759
6    4962
7    8813
8    3472
9    5022
Name: 80, dtype: int32

In [88]:
evl = Evaluator(k=10, training_set=training_ratings, testing_set=testing_ratings)
evl.evaluate(rd_rec)

In [89]:
evl.print_result()

{'precision': 0.0013224821973550356, 'recall': 0.00099878000317043}


In [94]:
evl.result['precision'] * 100

0.13224821973550355